{"id":"https://openalex.org/W7135014373","doi":"https://doi.org/10.48550/arxiv.2603.10765","title":"RAGPerf: An End-to-End Benchmarking Framework for Retrieval-Augmented Generation Systems","display_name":"RAGPerf: An End-to-End Benchmarking Framework for Retrieval-Augmented Generation Systems","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135014373","doi":"https://doi.org/10.48550/arxiv.2603.10765"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10765","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10765","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128849051","display_name":"Shaobo Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Shaobo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124166070","display_name":"Yirui Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Yirui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128810834","display_name":"Yuan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128876660","display_name":"Kevin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Kevin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033709518","display_name":"Daniel Waddington","orcid":"https://orcid.org/0000-0001-8758-910X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Waddington, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128904609","display_name":"Swaminathan Sundararaman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sundararaman, Swaminathan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038903691","display_name":"Hubertus Franke","orcid":"https://orcid.org/0009-0005-0150-1055"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Franke, Hubertus","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128812015","display_name":"Jian An Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.3671000003814697,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.3671000003814697,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.06040000170469284,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.05829999968409538,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7871999740600586},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6960999965667725},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5288000106811523},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.4494999945163727},{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.43220001459121704},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.40369999408721924},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.3853999972343445},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.3691999912261963},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.36250001192092896},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.34529998898506165}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8248000144958496},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7871999740600586},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6960999965667725},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5288000106811523},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.4494999945163727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4375999867916107},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.43220001459121704},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.40369999408721924},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3912000060081482},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.3853999972343445},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3691999912261963},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.36250001192092896},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.34290000796318054},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31360000371932983},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2953999936580658},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.29429998993873596},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C96956885","wikidata":"https://www.wikidata.org/wiki/Q6138701","display_name":"RDF query language","level":5,"score":0.2574000060558319},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.2565999925136566},{"id":"https://openalex.org/C130590232","wikidata":"https://www.wikidata.org/wiki/Q1671754","display_name":"Inverted index","level":3,"score":0.2563999891281128},{"id":"https://openalex.org/C2776543023","wikidata":"https://www.wikidata.org/wiki/Q2147046","display_name":"Rework","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10765","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10765","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,151],"present":[1],"the":[2,16,32,48,54,65,125,153],"design":[3],"and":[4,26,44,60,69,88,92,95,113,137,140,148,163],"implementation":[5],"of":[6,19,57,127,155,161],"a":[7,73,158],"RAG-based":[8],"AI":[9],"system":[10,17],"benchmarking":[11],"(RAGPerf)":[12],"framework":[13],"for":[14,50,120],"characterizing":[15],"behaviors":[18],"RAG":[20,33],"pipelines.":[21],"To":[22],"facilitate":[23],"detailed":[24],"profiling":[25],"fine-grained":[27],"performance":[28,68,128,177],"analysis,":[29],"RAGPerf":[30,46,71,98,156,174],"decouples":[31],"workflow":[34],"into":[35],"several":[36],"modular":[37],"components":[38],"-":[39],"embedding,":[40],"indexing,":[41],"retrieval,":[42],"reranking,":[43],"generation.":[45,122],"offers":[47],"flexibility":[49],"users":[51],"to":[52,76],"configure":[53],"core":[55],"parameters":[56],"each":[58],"component":[59],"examine":[61],"their":[62],"impact":[63],"on":[64],"end-to-end":[66,131],"query":[67,96,132,146],"quality.":[70],"has":[72],"workload":[74],"generator":[75],"model":[77],"real-world":[78],"scenarios":[79],"by":[80],"supporting":[81],"diverse":[82],"datasets":[83],"(e.g.,":[84],"text,":[85],"pdf,":[86],"code,":[87],"audio),":[89],"different":[90,101,118],"retrieval":[91],"update":[93],"ratios,":[94],"distributions.":[97],"also":[99],"supports":[100],"embedding":[102],"models,":[103],"major":[104],"vector":[105],"databases":[106],"such":[107],"as":[108,115,117],"LanceDB,":[109],"Milvus,":[110],"Qdrant,":[111],"Chroma,":[112],"Elasticsearch,":[114],"well":[116],"LLMs":[119],"content":[121],"It":[123],"automates":[124],"collection":[126],"metrics":[129,142],"(i.e.,":[130,143],"throughput,":[133],"host/GPU":[134],"memory":[135],"footprint,":[136],"CPU/GPU":[138],"utilization)":[139],"accuracy":[141],"context":[144],"recall,":[145],"accuracy,":[147],"factual":[149],"consistency).":[150],"demonstrate":[152],"capabilities":[154],"through":[157],"comprehensive":[159],"set":[160],"experiments":[162],"open":[164],"source":[165],"its":[166],"codebase":[167],"at":[168],"GitHub.":[169],"Our":[170],"evaluation":[171],"shows":[172],"that":[173],"incurs":[175],"negligible":[176],"overhead.":[178]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-03-13T00:00:00"}
