{"id":"https://openalex.org/W4414735868","doi":"https://doi.org/10.1145/3731569.3764855","title":"METIS: Fast Quality-Aware RAG Systems with Configuration Adaptation","display_name":"METIS: Fast Quality-Aware RAG Systems with Configuration Adaptation","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414735868","doi":"https://doi.org/10.1145/3731569.3764855"},"language":"en","primary_location":{"id":"doi:10.1145/3731569.3764855","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764855","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731569.3764855","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022254723","display_name":"Siddhant Ray","orcid":"https://orcid.org/0000-0003-0265-2144"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Siddhant Ray","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"raw_orcid":"https://orcid.org/0000-0003-0265-2144","affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100638027","display_name":"Rui Pan","orcid":"https://orcid.org/0000-0002-6973-3259"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rui Pan","raw_affiliation_strings":["Princeton University, Princeton, USA"],"raw_orcid":"https://orcid.org/0000-0002-6973-3259","affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038710809","display_name":"Zhuohan Gu","orcid":"https://orcid.org/0009-0005-1076-6549"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhuohan Gu","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"raw_orcid":"https://orcid.org/0009-0005-1076-6549","affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036272233","display_name":"Kuntai Du","orcid":"https://orcid.org/0000-0002-3964-4079"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]},{"id":"https://openalex.org/I886932462","display_name":"Foster-Miller (United States)","ror":"https://ror.org/01wm02973","country_code":"US","type":"company","lineage":["https://openalex.org/I886932462"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuntai Du","raw_affiliation_strings":["TensorMesh, Inc., Foster City, USA","University of Chicago, Chicago, USA"],"raw_orcid":"https://orcid.org/0000-0002-3964-4079","affiliations":[{"raw_affiliation_string":"TensorMesh, Inc., Foster City, USA","institution_ids":["https://openalex.org/I886932462"]},{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103526928","display_name":"Shaoting Feng","orcid":"https://orcid.org/0000-0003-3346-5165"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shaoting Feng","raw_affiliation_strings":["University of Chicago, Chicago, USA"],"raw_orcid":"https://orcid.org/0000-0003-3346-5165","affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031071237","display_name":"Ganesh Ananthanarayanan","orcid":"https://orcid.org/0000-0002-7479-1664"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ganesh Ananthanarayanan","raw_affiliation_strings":["Microsoft, Redmond, USA"],"raw_orcid":"https://orcid.org/0000-0002-7479-1664","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053593890","display_name":"Ravi Netravali","orcid":"https://orcid.org/0000-0001-7002-5033"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ravi Netravali","raw_affiliation_strings":["Princeton University, Princeton, USA"],"raw_orcid":"https://orcid.org/0000-0001-7002-5033","affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103258769","display_name":"Junchen Jiang","orcid":"https://orcid.org/0000-0002-6877-1683"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]},{"id":"https://openalex.org/I886932462","display_name":"Foster-Miller (United States)","ror":"https://ror.org/01wm02973","country_code":"US","type":"company","lineage":["https://openalex.org/I886932462"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junchen Jiang","raw_affiliation_strings":["TensorMesh, Inc., Foster City, USA","University of Chicago, Chicago, USA"],"raw_orcid":"https://orcid.org/0000-0002-6877-1683","affiliations":[{"raw_affiliation_string":"TensorMesh, Inc., Foster City, USA","institution_ids":["https://openalex.org/I886932462"]},{"raw_affiliation_string":"University of Chicago, Chicago, USA","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5022254723"],"corresponding_institution_ids":["https://openalex.org/I40347166"],"apc_list":null,"apc_paid":null,"fwci":5.9853,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.9631294,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"606","last_page":"622"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6890000104904175},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5763999819755554},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.40560001134872437},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.3982999920845032},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.37540000677108765},{"id":"https://openalex.org/keywords/metis","display_name":"Metis","score":0.31470000743865967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7239000201225281},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6890000104904175},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5763999819755554},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.444599986076355},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.40560001134872437},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3982999920845032},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.32019999623298645},{"id":"https://openalex.org/C2780705272","wikidata":"https://www.wikidata.org/wiki/Q19903554","display_name":"Metis","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.31299999356269836},{"id":"https://openalex.org/C154690210","wikidata":"https://www.wikidata.org/wiki/Q1668499","display_name":"Rewriting","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C19012869","wikidata":"https://www.wikidata.org/wiki/Q578372","display_name":"Response time","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C168031717","wikidata":"https://www.wikidata.org/wiki/Q1530280","display_name":"Balance (ability)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731569.3764855","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764855","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731569.3764855","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764855","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6296334437","display_name":null,"funder_award_id":"CNS 2146496","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1502957213","https://openalex.org/W2606555609","https://openalex.org/W2734941459","https://openalex.org/W2896457183","https://openalex.org/W2990138404","https://openalex.org/W3190126809","https://openalex.org/W4225576545","https://openalex.org/W4230872509","https://openalex.org/W4288253152","https://openalex.org/W4368755400","https://openalex.org/W4385571038","https://openalex.org/W4386576685","https://openalex.org/W4389518671","https://openalex.org/W4389519118","https://openalex.org/W4389519153","https://openalex.org/W4391871644","https://openalex.org/W4391876619","https://openalex.org/W4392930030","https://openalex.org/W4392971790","https://openalex.org/W4394778349","https://openalex.org/W4395443209","https://openalex.org/W4395686609","https://openalex.org/W4398808299","https://openalex.org/W4399252473","https://openalex.org/W4400104234","https://openalex.org/W4400141368","https://openalex.org/W4400267057","https://openalex.org/W4400267284","https://openalex.org/W4400377444","https://openalex.org/W4400611829","https://openalex.org/W4401042451","https://openalex.org/W4401042475","https://openalex.org/W4402502830","https://openalex.org/W4402502983","https://openalex.org/W4402703527","https://openalex.org/W4403347014","https://openalex.org/W4403365397","https://openalex.org/W4403443637","https://openalex.org/W4403590155","https://openalex.org/W4403752360","https://openalex.org/W4403885583","https://openalex.org/W4404088579","https://openalex.org/W4404313088","https://openalex.org/W4404314563","https://openalex.org/W4404341467","https://openalex.org/W4404351261","https://openalex.org/W4404354805","https://openalex.org/W4404400739","https://openalex.org/W4404405709","https://openalex.org/W4404432441","https://openalex.org/W4404783788","https://openalex.org/W4405033124","https://openalex.org/W4406542750","https://openalex.org/W4408848702","https://openalex.org/W4411403450","https://openalex.org/W4411486143","https://openalex.org/W4412945639","https://openalex.org/W4414371540"],"related_works":[],"abstract_inverted_index":{"RAG":[0,38,47,64,79,89,117],"(Retrieval":[1],"Augmented":[2],"Generation)":[3],"allows":[4],"LLMs":[5],"(large":[6],"language":[7],"models)":[8],"to":[9,114],"generate":[10],"better":[11,35],"responses":[12],"with":[13],"external":[14,19],"knowledge,":[15],"but":[16,49],"using":[17],"more":[18],"knowledge":[20],"causes":[21],"higher":[22],"response":[23,32,71],"delay.":[24],"Prior":[25],"work":[26],"focuses":[27],"either":[28],"on":[29,41],"reducing":[30],"the":[31,46,56,59,77,87,96,115,122],"delay":[33,60],"(e.g.,":[34,44],"scheduling":[36],"of":[37,63,91,98],"queries)":[39],"or":[40],"maximizing":[42],"quality":[43,62,69],"tuning":[45],"workflow),":[48],"they":[50],"fall":[51],"short":[52],"in":[53],"systematically":[54],"balancing":[55],"tradeoff":[57],"between":[58],"and":[61,70,85,102],"responses.":[65],"To":[66],"balance":[67],"both":[68],"delay,":[72],"this":[73],"paper":[74],"presents":[75],"METIS,":[76],"first":[78],"system":[80],"that":[81,112],"jointly":[82],"schedules":[83],"queries":[84],"adapts":[86],"key":[88],"configurations":[90],"each":[92],"query,":[93],"such":[94],"as":[95],"number":[97],"retrieved":[99],"text":[100],"chunks":[101],"synthesis":[103],"methods.":[104],"Using":[105],"four":[106],"popular":[107],"RAG-QA":[108],"datasets,":[109],"we":[110],"show":[111],"compared":[113],"state-of-the-art":[116],"optimization":[118],"schemes,":[119],"METIS":[120],"reduces":[121],"generation":[123,131],"latency":[124],"by":[125],"1.64":[126],"\u2013":[127],"2.54\u00d7":[128],"without":[129],"sacrificing":[130],"quality.":[132]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-19T08:33:51.333923","created_date":"2025-10-10T00:00:00"}
