{"id":"https://openalex.org/W7152741289","doi":"https://doi.org/10.48550/arxiv.2604.06793","title":"Evaluating Repository-level Software Documentation via Question Answering and Feature-Driven Development","display_name":"Evaluating Repository-level Software Documentation via Question Answering and Feature-Driven Development","publication_year":2026,"publication_date":"2026-04-08","ids":{"openalex":"https://openalex.org/W7152741289","doi":"https://doi.org/10.48550/arxiv.2604.06793"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06793","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06793","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06793","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133241957","display_name":"Xinchen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Xinchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133272099","display_name":"Ruida Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Ruida","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133261643","display_name":"Cuiyun Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Cuiyun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133295918","display_name":"Pengfei Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Pengfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133299201","display_name":"Chao Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Chao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5133241957"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9151999950408936,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9151999950408936,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.009999999776482582,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.009700000286102295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.9229000210762024},{"id":"https://openalex.org/keywords/software-documentation","display_name":"Software documentation","score":0.6446999907493591},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5426999926567078},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5109000205993652},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5078999996185303},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5005999803543091},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.46309998631477356},{"id":"https://openalex.org/keywords/internal-documentation","display_name":"Internal documentation","score":0.42179998755455017}],"concepts":[{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.9229000210762024},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7731999754905701},{"id":"https://openalex.org/C81587897","wikidata":"https://www.wikidata.org/wiki/Q181702","display_name":"Software documentation","level":5,"score":0.6446999907493591},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5426999926567078},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5376999974250793},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5109000205993652},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5078999996185303},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5005999803543091},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.46309998631477356},{"id":"https://openalex.org/C140396857","wikidata":"https://www.wikidata.org/wiki/Q16934771","display_name":"Internal documentation","level":5,"score":0.42179998755455017},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.421099990606308},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.41940000653266907},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.37770000100135803},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34389999508857727},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.3160000145435333},{"id":"https://openalex.org/C25688753","wikidata":"https://www.wikidata.org/wiki/Q1413406","display_name":"Technical documentation","level":3,"score":0.3118000030517578},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.26440000534057617},{"id":"https://openalex.org/C82214349","wikidata":"https://www.wikidata.org/wiki/Q657339","display_name":"Software metric","level":5,"score":0.25270000100135803},{"id":"https://openalex.org/C48002344","wikidata":"https://www.wikidata.org/wiki/Q2919644","display_name":"Verification and validation","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06793","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06793","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06793","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06793","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Software":[0],"documentation":[1,13,76,167,179,199],"is":[2,98,120],"crucial":[3],"for":[4,64],"repository":[5],"comprehension.":[6],"While":[7],"Large":[8],"Language":[9],"Models":[10],"(LLMs)":[11],"advance":[12],"generation":[14,168],"from":[15,47,180],"code":[16,174],"snippets":[17],"to":[18,83,115,125,137],"entire":[19],"repositories,":[20],"existing":[21],"benchmarks":[22],"have":[23],"two":[24],"key":[25],"limitations:":[26],"(1)":[27,112],"they":[28,36],"lack":[29],"a":[30,61,118],"holistic,":[31],"repository-level":[32,52,66,160],"assessment,":[33],"and":[34,50,85,133,156,170],"(2)":[35,122],"rely":[37],"on":[38],"unreliable":[39],"evaluation":[40],"strategies,":[41],"such":[42],"as":[43],"LLM-as-a-judge,":[44],"which":[45,192],"suffers":[46],"vague":[48],"criteria":[49],"limited":[51],"knowledge.":[53],"To":[54],"address":[55],"these":[56],"issues,":[57],"we":[58],"introduce":[59],"SWD-Bench,":[60],"novel":[62],"benchmark":[63],"evaluating":[65],"software":[67],"documentation.":[68],"Inspired":[69],"by":[70,78,93,151,190],"documentation-driven":[71,202],"development,":[72],"our":[73],"strategy":[74],"evaluates":[75],"quality":[77],"assessing":[79],"an":[80],"LLM's":[81],"ability":[82],"understand":[84],"implement":[86],"functionalities":[87],"using":[88],"the":[89,127,139,146,181,185,194],"documentation,":[90],"rather":[91],"than":[92],"directly":[94],"scoring":[95],"it.":[96],"This":[97],"measured":[99],"through":[100],"function-driven":[101],"Question":[102],"Answering":[103],"(QA)":[104],"tasks.":[105],"SWD-Bench":[106],"comprises":[107],"three":[108],"interconnected":[109],"QA":[110],"tasks:":[111],"Functionality":[113,123,135],"Detection,":[114],"determine":[116],"if":[117],"functionality":[119],"described;":[121],"Localization,":[124],"evaluate":[126],"accuracy":[128],"of":[129,141,188,197],"locating":[130],"related":[131],"files;":[132],"(3)":[134],"Completion,":[136],"measure":[138],"comprehensiveness":[140],"implementation":[142],"details.":[143],"We":[144],"construct":[145],"benchmark,":[147],"containing":[148],"4,170":[149],"entries,":[150],"mining":[152],"high-quality":[153,198],"Pull":[154],"Requests":[155],"enriching":[157],"them":[158],"with":[159],"context.":[161],"Experiments":[162],"reveal":[163],"limitations":[164],"in":[165,200],"current":[166],"methods":[169],"show":[171],"that":[172],"source":[173],"provides":[175],"complementary":[176],"value.":[177],"Notably,":[178],"best-performing":[182],"method":[183],"improves":[184],"issue-solving":[186],"rate":[187],"SWE-Agent":[189],"20.00%,":[191],"demonstrates":[193],"practical":[195],"value":[196],"supporting":[201],"development.":[203]},"counts_by_year":[],"updated_date":"2026-04-10T06:07:51.998497","created_date":"2026-04-10T00:00:00"}
