{"id":"https://openalex.org/W7084090392","doi":"https://doi.org/10.1109/infocom55648.2025.11044685","title":"AdaRAG: Adaptive Optimization for Retrieval Augmented Generation with Multilevel Retrievers at the Edge","display_name":"AdaRAG: Adaptive Optimization for Retrieval Augmented Generation with Multilevel Retrievers at the Edge","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W7084090392","doi":"https://doi.org/10.1109/infocom55648.2025.11044685"},"language":"en","primary_location":{"id":"doi:10.1109/infocom55648.2025.11044685","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocom55648.2025.11044685","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2025 - IEEE Conference on Computer Communications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tao Ouyang","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tao Ouyang","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Guihang Hong","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guihang Hong","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kongyange Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kongyange Zhao","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhi Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Zhou","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Weigang Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weigang Wu","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhaobiao Lv","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaobiao Lv","raw_affiliation_strings":["China United Network Telecommunications Corporation Limited,China"],"affiliations":[{"raw_affiliation_string":"China United Network Telecommunications Corporation Limited,China","institution_ids":["https://openalex.org/I4210136246"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xu Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Chen","raw_affiliation_strings":["School of Computer Science and Engineering, Sun Yat-sen University,China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":0.8565,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8013239,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11515","display_name":"Bacillus and Francisella bacterial research","score":0.3481000065803528,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11515","display_name":"Bacillus and Francisella bacterial research","score":0.3481000065803528,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.12600000202655792,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10593","display_name":"Bacterial biofilms and quorum sensing","score":0.10999999940395355,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6226999759674072},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5598000288009644},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.5307000279426575},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4438000023365021},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.43849998712539673},{"id":"https://openalex.org/keywords/randomness","display_name":"Randomness","score":0.4381999969482422},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.42419999837875366},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.388700008392334},{"id":"https://openalex.org/keywords/adaptive-optimization","display_name":"Adaptive optimization","score":0.3862999975681305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.842199981212616},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6226999759674072},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5598000288009644},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.5307000279426575},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.43849998712539673},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.4381999969482422},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.42719998955726624},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.42419999837875366},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C149672232","wikidata":"https://www.wikidata.org/wiki/Q337048","display_name":"Adaptive optimization","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3763999938964844},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.32190001010894775},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.3215000033378601},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30070000886917114},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.2921000123023987},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29100000858306885},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.27480000257492065},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/infocom55648.2025.11044685","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocom55648.2025.11044685","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2025 - IEEE Conference on Computer Communications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Considering":[0],"privacy":[1],"concerns":[2],"and":[3,27,84,92,98,109,118,134,149,163,169,200,204,214],"real-time":[4,192],"demands":[5],"of":[6,35,145,229],"popular":[7],"large":[8],"language":[9],"models":[10],"(LLMs),":[11],"a":[12,176,186],"shift":[13],"towards":[14],"edge-based":[15,78,233],"LLM":[16,147,234],"inference":[17,99,148],"leverages":[18],"edge":[19,67,105],"clusters":[20],"in":[21,65,121,231],"proximity":[22],"to":[23,61,87,125,141,173,184,195],"provide":[24],"low":[25],"latency":[26,135,168],"secure":[28],"responsiveness.":[29],"To":[30,69],"enhance":[31],"the":[32,114,127,142,154,156,160,197,202,208,227],"generation":[33,38,132],"quality":[34,133],"LLMs,":[36],"retrieval-augmented":[37],"(RAG)":[39],"can":[40,58,225],"seamlessly":[41],"integrate":[42],"relevant":[43],"external":[44],"knowledge":[45],"from":[46],"local":[47],"databases":[48],"into":[49],"LLMs":[50],"without":[51],"dedicated":[52],"fine-tuning.":[53],"However,":[54],"this":[55,71],"retrieval":[56,90,97,116,203],"process":[57],"significantly":[59],"contribute":[60],"overall":[62,131],"latency,":[63],"particularly":[64],"resource-constrained":[66],"environments.":[68],"address":[70],"challenge,":[72],"we":[73,179],"introduce":[74],"AdaRAG,":[75],"tailored":[76],"for":[77,96,136],"RAG,":[79],"leveraging":[80],"multilevel":[81],"(i.e.,":[82,107,166],"light":[83],"heavy)":[85],"retrievers":[86],"facilitate":[88],"adaptive":[89],"granularity":[91],"efficient":[93],"pipeline":[94],"parallelism":[95],"processes":[100],"by":[101],"fully":[102],"exploiting":[103],"heterogeneous":[104],"resources":[106],"CPU":[108],"GPU).":[110],"AdaRAG":[111,230],"adaptively":[112],"manages":[113],"heavy":[115],"proportion":[117],"selected":[119],"documents":[120],"augmented":[122],"prompts,":[123],"aiming":[124],"balance":[126],"long-term":[128],"trade-off":[129],"between":[130,159],"dynamic":[137,151],"user":[138],"queries.":[139],"Due":[140],"inherent":[143],"randomness":[144],"probabilistic":[146],"highly":[150],"queries":[152],"at":[153],"edge,":[155],"underlying":[157],"relations":[158],"above":[161],"decisions":[162,206],"performance":[164,193],"feedback":[165,194],"end-to-end":[167],"accuracy)":[170],"are":[171],"difficult":[172],"obtain":[174],"accurately":[175],"priori.":[177],"Thus,":[178],"adopt":[180],"bandit":[181],"convex":[182],"optimization":[183],"design":[185],"lightweight":[187],"online":[188],"algorithm,":[189],"which":[190],"utilizes":[191],"estimate":[196],"gradient":[198],"information":[199],"optimize":[201],"prompt":[205],"on":[207],"fly.":[209],"Our":[210],"rigorous":[211],"theoretical":[212],"analysis":[213],"extensive":[215],"evaluations":[216],"show":[217],"our":[218],"AdaRAG's":[219],"superior":[220],"performance.":[221],"These":[222],"promising":[223],"results":[224],"boost":[226],"adoption":[228],"future":[232],"applications.":[235]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
