{"id":"https://openalex.org/W4394782236","doi":"https://doi.org/10.48550/arxiv.2404.07940","title":"InfiBench: Evaluating the Question-Answering Capabilities of Code Large Language Models","display_name":"InfiBench: Evaluating the Question-Answering Capabilities of Code Large Language Models","publication_year":2024,"publication_date":"2024-03-11","ids":{"openalex":"https://openalex.org/W4394782236","doi":"https://doi.org/10.48550/arxiv.2404.07940"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2404.07940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.07940","pdf_url":"https://arxiv.org/pdf/2404.07940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.07940","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075538321","display_name":"Linyi Li","orcid":"https://orcid.org/0000-0002-2185-8407"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Linyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080973846","display_name":"Shijie Geng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geng, Shijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001648898","display_name":"Zhenwen Li","orcid":"https://orcid.org/0000-0002-1601-8403"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhenwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101865953","display_name":"Yibo He","orcid":"https://orcid.org/0000-0002-6306-0647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yibo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432177","display_name":"Hao Yu","orcid":"https://orcid.org/0000-0002-2779-9145"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113115887","display_name":"Ziyue Hua","orcid":"https://orcid.org/0009-0007-8348-5232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hua, Ziyue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082349270","display_name":"Guanghan Ning","orcid":"https://orcid.org/0000-0002-4356-7862"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ning, Guanghan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100656645","display_name":"Siwei Wang","orcid":"https://orcid.org/0000-0001-9517-262X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Siwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048118068","display_name":"Tao Xie","orcid":"https://orcid.org/0000-0002-6731-216X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5082599714","display_name":"Hongxia Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Hongxia","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5075538321"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9664000272750854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6611434817314148},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6174106001853943},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5740112066268921},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.545743465423584},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2904232144355774},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.12459611892700195}],"concepts":[{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6611434817314148},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6174106001853943},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5740112066268921},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.545743465423584},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2904232144355774},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.12459611892700195},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2404.07940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.07940","pdf_url":"https://arxiv.org/pdf/2404.07940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2404.07940","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2404.07940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2404.07940","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.07940","pdf_url":"https://arxiv.org/pdf/2404.07940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394782236.pdf","grobid_xml":"https://content.openalex.org/works/W4394782236.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W1667647204","https://openalex.org/W2404647514","https://openalex.org/W4247536566","https://openalex.org/W2018477250","https://openalex.org/W3119814709","https://openalex.org/W4241418540","https://openalex.org/W1508895727","https://openalex.org/W2725786787"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"for":[3,87,125,133,156,179],"code":[4,19,38,45,61,66,88,137,160,180],"(code":[5],"LLMs)":[6],"have":[7,31],"witnessed":[8],"tremendous":[9],"progress":[10],"in":[11],"recent":[12],"years.":[13],"With":[14],"the":[15,35,54,80,123],"rapid":[16],"development":[17],"of":[18,37,57,60,110,145,159],"LLMs,":[20,62],"many":[21],"popular":[22],"evaluation":[23,132],"benchmarks,":[24],"such":[25],"as":[26],"HumanEval,":[27],"DS-1000,":[28],"and":[29,147,169,176],"MBPP,":[30],"emerged":[32],"to":[33,52,68,89,114,142,172],"measure":[34],"performance":[36],"LLMs":[39,138],"with":[40],"a":[41,130,143],"particular":[42],"focus":[43],"on":[44,139],"generation":[46,67],"tasks.":[47],"However,":[48],"they":[49],"are":[50],"insufficient":[51],"cover":[53],"full":[55],"range":[56],"expected":[58],"capabilities":[59],"which":[63],"span":[64,101],"beyond":[65],"answering":[69],"diverse":[70],"coding-related":[71],"questions.":[72],"To":[73],"fill":[74],"this":[75],"gap,":[76],"we":[77],"propose":[78],"InfiBench,":[79,140],"first":[81],"large-scale":[82],"freeform":[83],"question-answering":[84],"(QA)":[85],"benchmark":[86],"our":[90],"knowledge,":[91],"comprising":[92],"234":[93],"carefully":[94,121],"selected":[95],"high-quality":[96],"Stack":[97],"Overflow":[98],"questions":[99],"that":[100],"across":[102],"15":[103],"programming":[104],"languages.":[105],"InfiBench":[106,162],"uses":[107],"four":[108],"types":[109],"model-free":[111],"automatic":[112],"metrics":[113],"evaluate":[115],"response":[116],"correctness":[117],"where":[118],"domain":[119],"experts":[120],"concretize":[122],"criterion":[124],"each":[126],"question.":[127],"We":[128],"conduct":[129],"systematic":[131,177],"over":[134],"100":[135],"latest":[136],"leading":[141],"series":[144],"novel":[146],"insightful":[148],"findings.":[149],"Our":[150],"detailed":[151],"analyses":[152],"showcase":[153],"potential":[154],"directions":[155],"further":[157],"advancement":[158],"LLMs.":[161],"is":[163],"fully":[164],"open":[165],"source":[166],"at":[167],"https://infi-coder.github.io/infibench":[168],"continuously":[170],"expanding":[171],"foster":[173],"more":[174],"scientific":[175],"practices":[178],"LLM":[181],"evaluation.":[182]},"counts_by_year":[],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2024-04-13T00:00:00"}
