{"id":"https://openalex.org/W4402442372","doi":"https://doi.org/10.1145/3650212.3680399","title":"Calico: Automated Knowledge Calibration and Diagnosis for Elevating AI Mastery in Code Tasks","display_name":"Calico: Automated Knowledge Calibration and Diagnosis for Elevating AI Mastery in Code Tasks","publication_year":2024,"publication_date":"2024-09-11","ids":{"openalex":"https://openalex.org/W4402442372","doi":"https://doi.org/10.1145/3650212.3680399"},"language":"en","primary_location":{"id":"doi:10.1145/3650212.3680399","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650212.3680399","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3650212.3680399","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100954623","display_name":"Yuxin Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuxin Qiu","raw_affiliation_strings":["University of California at Riverside, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"University of California at Riverside, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093868949","display_name":"Jie Hu","orcid":"https://orcid.org/0009-0009-3527-7396"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Hu","raw_affiliation_strings":["University of California at Riverside, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"University of California at Riverside, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011473115","display_name":"Qian Zhang","orcid":"https://orcid.org/0000-0002-1721-0907"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qian Zhang","raw_affiliation_strings":["University of California at Riverside, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"University of California at Riverside, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073376805","display_name":"Heng Yin","orcid":"https://orcid.org/0000-0002-8942-7742"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heng Yin","raw_affiliation_strings":["University of California at Riverside, Riverside, USA"],"affiliations":[{"raw_affiliation_string":"University of California at Riverside, Riverside, USA","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100954623"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":2.5777,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.91520277,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1785","last_page":"1797"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.7246050834655762},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6954071521759033},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6191578507423401},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44071441888809204},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3835335373878479},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3757816255092621},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.3601571321487427},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09614849090576172},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0654459297657013}],"concepts":[{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.7246050834655762},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6954071521759033},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6191578507423401},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44071441888809204},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3835335373878479},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3757816255092621},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3601571321487427},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09614849090576172},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0654459297657013},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3650212.3680399","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650212.3680399","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3650212.3680399","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650212.3680399","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W2098664130","https://openalex.org/W2101105183","https://openalex.org/W2101720091","https://openalex.org/W2121081915","https://openalex.org/W2121217767","https://openalex.org/W2146957318","https://openalex.org/W2159725273","https://openalex.org/W2166993820","https://openalex.org/W2170224888","https://openalex.org/W2171383742","https://openalex.org/W2294980783","https://openalex.org/W2767798196","https://openalex.org/W2898480012","https://openalex.org/W2955127311","https://openalex.org/W2964150020","https://openalex.org/W2964194820","https://openalex.org/W2972082064","https://openalex.org/W2972135640","https://openalex.org/W3098605233","https://openalex.org/W3151395068","https://openalex.org/W3161903544","https://openalex.org/W3182546273","https://openalex.org/W3185341429","https://openalex.org/W3193977407","https://openalex.org/W3214404698","https://openalex.org/W4284709233","https://openalex.org/W4297943933","https://openalex.org/W4308643152","https://openalex.org/W4308731473","https://openalex.org/W4378591002","https://openalex.org/W4382246105","https://openalex.org/W4384154639","https://openalex.org/W4384304865","https://openalex.org/W4384345708","https://openalex.org/W4384345728","https://openalex.org/W4384345745","https://openalex.org/W4385302156","https://openalex.org/W4388483128","https://openalex.org/W4389158474","https://openalex.org/W4389158722","https://openalex.org/W4391558438","https://openalex.org/W4391579639","https://openalex.org/W4391579642","https://openalex.org/W4391724785"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W2114770238","https://openalex.org/W4224009465","https://openalex.org/W2365607528","https://openalex.org/W2046724649","https://openalex.org/W2555586164","https://openalex.org/W4306674287","https://openalex.org/W2030747924","https://openalex.org/W4286629047","https://openalex.org/W1499374895"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,11,29,96,154,200,252],"large":[3],"language":[4],"models":[5,48],"(LLMs)":[6],"have":[7,204],"exhibited":[8],"promising":[9],"capabilities":[10],"addressing":[12],"various":[13],"tasks":[14,45],"such":[15],"as":[16,145],"defect":[17],"detection":[18],"and":[19,39,62,81,116,135,182,238],"program":[20,156,253],"repair.":[21],"Despite":[22],"their":[23,41],"prevalence,":[24],"LLMs":[25,95,199],"still":[26],"face":[27],"limitations":[28],"effectively":[30],"handling":[31],"these":[32],"tasks.":[33,99,202,217],"Common":[34],"strategies":[35],"to":[36,72,103,164,185,194,243,249],"adapt":[37],"them":[38],"improve":[40,195],"performance":[42,197],"for":[43,68,75,113,173,269],"specific":[44],"involve":[46],"fine-tuning":[47],"based":[49],"on":[50,150,211],"user":[51],"data":[52],"or":[53,89],"employing":[54],"in-context":[55],"learning":[56],"with":[57,282],"examples":[58],"of":[59,94,108,119,125,142,190,198,264],"desired":[60],"inputs":[61],"outputs.":[63],"However,":[64],"they":[65],"pose":[66],"challenges":[67],"practical":[69],"adoption":[70],"due":[71],"the":[73,92,97,105,111,123,166,170,196,262,277],"need":[74],"extensive":[76],"computational":[77,279],"resources,":[78],"high-quality":[79],"data,":[80],"continuous":[82],"maintenance.":[83],"Furthermore,":[84],"neither":[85],"strategy":[86],"can":[87],"explain":[88],"reason":[90],"about":[91],"deficiencies":[93],"given":[98],"We":[100,203],"propose":[101],"Calico":[102,126,143,207,233],"address":[104],"high":[106,278],"cost":[107],"fine-tuning,":[109],"eliminate":[110],"necessity":[112],"task-specific":[114],"examples,":[115],"provide":[117],"explanations":[118],"LLM":[120,268],"deficiency.":[121],"At":[122],"heart":[124],"is":[127,144],"an":[128],"evolutionary":[129],"approach":[130],"that":[131,222],"interleaves":[132],"knowledge":[133,152,168,192,231],"calibration":[134],"AI":[136],"deficiency":[137],"diagnosis.":[138],"The":[139],"key":[140],"essence":[141],"follows.":[146],"First,":[147],"it":[148,159,178,235],"focuses":[149],"identifying":[151],"gaps":[153],"LLMs\u2019":[155],"comprehension.":[157],"Second,":[158],"conducts":[160],"automated":[161,270],"code":[162,172,201,216,228],"refactoring":[163],"integrate":[165],"overlooked":[167,191],"into":[169],"source":[171],"mitigating":[174],"those":[175],"gaps.":[176],"Third,":[177],"employs":[179],"what-if":[180],"analysis":[181],"counterfactual":[183],"reasoning":[184],"determine":[186],"a":[187,266,283],"minimum":[188],"set":[189],"necessary":[193],"extensively":[205],"evaluated":[206],"over":[208],"8,938":[209],"programs":[210],"three":[212],"most":[213],"commonly":[214],"seen":[215],"Our":[218],"experimental":[219],"results":[220,260],"show":[221],"vanilla":[223,267],"ChatGPT":[224],"cannot":[225],"fully":[226],"understand":[227],"structures.":[229],"With":[230],"calibration,":[232],"improves":[234],"by":[236],"20%":[237],"exhibits":[239],"comparable":[240],"proficiency":[241],"compared":[242],"fine-tuned":[244,284],"LLMs.":[245],"Deficiency":[246],"diagnosis":[247],"contributes":[248],"8%":[250],"reduction":[251],"sizes":[254],"while":[255],"ensuring":[256],"performance.":[257],"These":[258],"impressive":[259],"demonstrate":[261],"feasibility":[263],"utilizing":[265],"software":[271],"engineering":[272],"(SE)":[273],"tasks,":[274],"thereby":[275],"avoiding":[276],"costs":[280],"associated":[281],"model.":[285]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
