{"id":"https://openalex.org/W4415480619","doi":"https://doi.org/10.1145/3704413.3764419","title":"DRAGON: Enhancing On-Device Model Performance with Distributed Retrieval-Augmented Generation","display_name":"DRAGON: Enhancing On-Device Model Performance with Distributed Retrieval-Augmented Generation","publication_year":2025,"publication_date":"2025-10-23","ids":{"openalex":"https://openalex.org/W4415480619","doi":"https://doi.org/10.1145/3704413.3764419"},"language":null,"primary_location":{"id":"doi:10.1145/3704413.3764419","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704413.3764419","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704413.3764419","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-sixth International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3704413.3764419","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025037085","display_name":"Shangyu Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shangyu Liu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050396482","display_name":"Zhenzhe Zheng","orcid":"https://orcid.org/0000-0003-3447-5349"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenzhe Zheng","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031124981","display_name":"Xiaoyao Huang","orcid":"https://orcid.org/0000-0003-2571-1979"},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4210144487","display_name":"Cloud Computing Center","ror":"https://ror.org/04aa0zm65","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210144487"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyao Huang","raw_affiliation_strings":["Cloud Computing Research Institute, China Telecom, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Cloud Computing Research Institute, China Telecom, Beijing, China","institution_ids":["https://openalex.org/I4210144487","https://openalex.org/I4210136246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059190563","display_name":"Fan Wu","orcid":"https://orcid.org/0000-0003-0965-9058"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Wu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100428808","display_name":"Guihai Chen","orcid":"https://orcid.org/0000-0002-6934-1685"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guihai Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100600528","display_name":"Jie Wu","orcid":"https://orcid.org/0000-0002-3472-1717"},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4210144487","display_name":"Cloud Computing Center","ror":"https://ror.org/04aa0zm65","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210144487"]},{"id":"https://openalex.org/I84392919","display_name":"Temple University","ror":"https://ror.org/00kx1jb78","country_code":"US","type":"education","lineage":["https://openalex.org/I84392919"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Jie Wu","raw_affiliation_strings":["Cloud Computing Research Institute, China Telecom, Beijing, China","Temple University, Philadelphia, Pennsylvania, USA"],"affiliations":[{"raw_affiliation_string":"Cloud Computing Research Institute, China Telecom, Beijing, China","institution_ids":["https://openalex.org/I4210144487","https://openalex.org/I4210136246"]},{"raw_affiliation_string":"Temple University, Philadelphia, Pennsylvania, USA","institution_ids":["https://openalex.org/I84392919"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025037085"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29478708,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"221","last_page":"230"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12238","display_name":"Green IT and Sustainability","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12238","display_name":"Green IT and Sustainability","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.7958999872207642},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7368999719619751},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6353999972343445},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5916000008583069},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4896000027656555},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4586000144481659},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.45260000228881836},{"id":"https://openalex.org/keywords/distributed-database","display_name":"Distributed database","score":0.3682999908924103},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.3582000136375427}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8004999756813049},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.7958999872207642},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7368999719619751},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6395999789237976},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6353999972343445},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5916000008583069},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4896000027656555},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4586000144481659},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.45260000228881836},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35760000348091125},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.3472999930381775},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3458000123500824},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.34060001373291016},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.31520000100135803},{"id":"https://openalex.org/C115067241","wikidata":"https://www.wikidata.org/wiki/Q1639854","display_name":"Token passing","level":3,"score":0.3131999969482422},{"id":"https://openalex.org/C130120984","wikidata":"https://www.wikidata.org/wiki/Q2835898","display_name":"Distributed algorithm","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.2533000111579895},{"id":"https://openalex.org/C99221444","wikidata":"https://www.wikidata.org/wiki/Q1532069","display_name":"Private information retrieval","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3704413.3764419","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704413.3764419","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704413.3764419","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-sixth International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3704413.3764419","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3704413.3764419","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3704413.3764419","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-sixth International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G133328775","display_name":null,"funder_award_id":"62272307","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1746474507","display_name":null,"funder_award_id":"U2268204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G20903679","display_name":null,"funder_award_id":"2322206","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2436440598","display_name":null,"funder_award_id":"62322206","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4806219103","display_name":"RII Track-4: Superparamagnetic Iron Oxide Nanoparticles as Recoverable Microwave Susceptors for Pre-hydrolysis of Waste Activated Sludge prior to Anaerobic Digestion","funder_award_id":"2132018","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5894582779","display_name":null,"funder_award_id":"62132018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6775322962","display_name":null,"funder_award_id":"62372296","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6970794145","display_name":null,"funder_award_id":"62025204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415480619.pdf","grobid_xml":"https://content.openalex.org/works/W4415480619.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W2129636357","https://openalex.org/W2340309946","https://openalex.org/W2970641574","https://openalex.org/W2995022099","https://openalex.org/W3049640275","https://openalex.org/W3099700870","https://openalex.org/W3138562521","https://openalex.org/W3172857592","https://openalex.org/W4388778348","https://openalex.org/W4389518901","https://openalex.org/W4389519118","https://openalex.org/W4393147129","https://openalex.org/W4401536895","https://openalex.org/W4401863300","https://openalex.org/W4402672007","https://openalex.org/W4404783889"],"related_works":[],"abstract_inverted_index":{"Small":[0],"language":[1],"models":[2],"(SLMs)":[3],"support":[4],"efficient":[5],"deployments":[6],"on":[7,51,108,149,154],"resource-constrained":[8],"edge":[9],"devices,":[10],"but":[11],"their":[12],"limited":[13],"capacity":[14],"compromises":[15],"inference":[16],"performance.":[17],"Retrieval-augmented":[18],"generation":[19,102],"(RAG)":[20],"is":[21,139],"a":[22,72,116,121,159],"promising":[23],"solution":[24],"to":[25,76,125,142,165,173,185],"enhance":[26,77],"model":[27,37],"performance":[28,161],"by":[29],"integrating":[30],"external":[31],"databases,":[32],"without":[33,86],"requiring":[34],"intensive":[35],"on-device":[36,78],"retraining.":[38],"However,":[39],"large-scale":[40],"public":[41],"databases":[42],"and":[43,54,83,106,111,114,133,182],"user-specific":[44],"private":[45],"contextual":[46],"documents":[47],"are":[48,62],"typically":[49],"located":[50],"the":[52,55,87,109,112,131,144,174],"cloud":[53,110,132],"device,":[56,113],"respectively,":[57],"while":[58],"existing":[59],"RAG":[60,74,97],"implementations":[61],"primarily":[63],"centralized.":[64],"To":[65],"bridge":[66],"this":[67],"gap,":[68],"we":[69],"propose":[70],"DRAGON,":[71],"distributed":[73],"framework":[75],"SLMs":[79],"through":[80],"both":[81],"general":[82],"personal":[84],"knowledge":[85],"risk":[88],"of":[89,163],"leaking":[90],"document":[91],"privacy.":[92],"Specifically,":[93],"DRAGON":[94],"decomposes":[95],"multi-document":[96],"into":[98],"multiple":[99],"parallel":[100],"token":[101],"processes":[103],"performed":[104],"independently":[105],"locally":[107],"employs":[115],"newly":[117],"designed":[118],"Speculative":[119],"Aggregation,":[120],"dual-side":[122],"speculative":[123],"algorithm":[124,138],"avoid":[126],"frequent":[127],"output":[128],"synchronization":[129],"between":[130],"device.":[134],"A":[135],"new":[136],"scheduling":[137],"further":[140],"introduced":[141],"identify":[143],"optimal":[145],"aggregation":[146],"side":[147],"based":[148],"real-time":[150],"network":[151],"conditions.":[152],"Evaluations":[153],"real-world":[155],"hardware":[156],"testbed":[157],"demonstrate":[158],"significant":[160],"improvement":[162],"DRAGON\u2014up":[164],"1.9X":[166],"greater":[167],"gains":[168],"over":[169],"standalone":[170],"SLM":[171],"compared":[172],"centralized":[175],"RAG,":[176],"substantial":[177],"reduction":[178],"in":[179],"per-token":[180],"latency,":[181],"negligible":[183],"Time":[184],"First":[186],"Token":[187],"(TTFT)":[188],"overhead.":[189]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-23T00:00:00"}
