{"id":"https://openalex.org/W7137824411","doi":"https://doi.org/10.1609/aaai.v40i38.40482","title":"ARBench: Algorithmic Reasoner or API Alchemist? Evaluating LLMs Beyond API Calls","display_name":"ARBench: Algorithmic Reasoner or API Alchemist? Evaluating LLMs Beyond API Calls","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137824411","doi":"https://doi.org/10.1609/aaai.v40i38.40482"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i38.40482","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40482","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i38.40482","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125715501","display_name":"Ren-Biao Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ren-Biao Liu","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125751834","display_name":"Chao-Zeng Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao-Zeng Ma","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129690797","display_name":"Anqi Li","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Anqi Li","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129641009","display_name":"Hui Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Sun","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129743220","display_name":"Xin-Ye Li","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin-Ye Li","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129647833","display_name":"Ming Li","orcid":null},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Li","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5125715501"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15384615,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"38","first_page":"32105","last_page":"32113"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.19359999895095825,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.19359999895095825,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.17919999361038208,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08150000125169754,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantic-reasoner","display_name":"Semantic reasoner","score":0.5751000046730042},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5146999955177307},{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.39989998936653137},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.3452000021934509},{"id":"https://openalex.org/keywords/rote-learning","display_name":"Rote learning","score":0.32120001316070557},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.31470000743865967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6819000244140625},{"id":"https://openalex.org/C9616225","wikidata":"https://www.wikidata.org/wiki/Q3929429","display_name":"Semantic reasoner","level":2,"score":0.5751000046730042},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5146999955177307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4207000136375427},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.39989998936653137},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.39800000190734863},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3416999876499176},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33809998631477356},{"id":"https://openalex.org/C148324565","wikidata":"https://www.wikidata.org/wiki/Q784221","display_name":"Rote learning","level":4,"score":0.32120001316070557},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.31470000743865967},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3109000027179718},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2797999978065491},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i38.40482","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40482","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i38.40482","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40482","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"demonstrated":[5],"impressive":[6],"capabilities":[7],"in":[8,130],"code":[9],"generation.":[10],"Like":[11],"human":[12],"programmers,":[13],"LLMs":[14,30,48,129],"tend":[15],"to":[16,22,39,51,53,76,114,138],"call":[17],"high-level":[18,86],"APIs":[19],"and":[20,92,104,125,133,142],"libraries":[21],"program":[23],"efficiently.":[24],"However,":[25],"this":[26,64],"shortcut":[27],"may":[28],"hinder":[29],"from":[31,81,97,118],"learning":[32,79,107],"the":[33,123],"essential":[34],"algorithm":[35],"reasoning,":[36],"leading":[37],"instead":[38],"rote":[40],"memorization":[41],"of":[42,127],"API":[43,99],"usage.":[44,100],"As":[45],"a":[46,69],"result,":[47],"often":[49],"struggle":[50],"generalize":[52],"new":[54],"or":[55],"domain-specific":[56],"algorithms":[57,80,117],"that":[58],"lack":[59],"ready-made":[60],"library":[61],"support.":[62],"In":[63],"work,":[65],"we":[66],"propose":[67],"ARBench,":[68],"novel":[70],"benchmark":[71],"for":[72],"evaluating":[73],"LLMs\u2019":[74,112],"ability":[75],"generate":[77],"machine":[78,106],"scratch,":[82],"beyond":[83],"merely":[84],"invoking":[85],"APIs.":[87],"It":[88,101],"emphasizes":[89],"algorithmic":[90,131],"reasoning":[91,132],"implementation,":[93],"distinguishing":[94],"genuine":[95],"understanding":[96],"superficial":[98],"covers":[102],"fundamental":[103],"advanced":[105],"tasks,":[108],"rigorously":[109],"assessing":[110],"current":[111],"capacity":[113],"implement":[115],"these":[116],"scratch.":[119],"Our":[120],"evaluation":[121],"reveals":[122],"strengths":[124],"weaknesses":[126],"state-of-the-art":[128],"generalization,":[134],"offering":[135],"valuable":[136],"insights":[137],"guide":[139],"future":[140],"research":[141],"development.":[143]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
