{"id":"https://openalex.org/W4412827352","doi":"https://doi.org/10.1145/3744340","title":"AcTracer: Active Testing of Large Language Model via Multi-Stage Sampling","display_name":"AcTracer: Active Testing of Large Language Model via Multi-Stage Sampling","publication_year":2025,"publication_date":"2025-08-01","ids":{"openalex":"https://openalex.org/W4412827352","doi":"https://doi.org/10.1145/3744340"},"language":"en","primary_location":{"id":"doi:10.1145/3744340","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3744340","pdf_url":null,"source":{"id":"https://openalex.org/S142627899","display_name":"ACM Transactions on Software Engineering and Methodology","issn_l":"1049-331X","issn":["1049-331X","1557-7392"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Software Engineering and Methodology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101684275","display_name":"Yuheng Huang","orcid":"https://orcid.org/0000-0003-3666-4020"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yuheng Huang","raw_affiliation_strings":["The University of Tokyo, Tokyo, Japan","The University of Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0003-3666-4020","affiliations":[{"raw_affiliation_string":"The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]},{"raw_affiliation_string":"The University of Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104291087","display_name":"Jiayang Song","orcid":"https://orcid.org/0009-0008-7093-9781"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiayang Song","raw_affiliation_strings":["Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada","University of Alberta, Canada"],"raw_orcid":"https://orcid.org/0009-0008-7093-9781","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, University of Alberta, Edmonton, Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]},{"raw_affiliation_string":"University of Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101406450","display_name":"Qiang Hu","orcid":"https://orcid.org/0000-0002-8251-1669"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Hu","raw_affiliation_strings":["Tianjin University, Tianjin, China","Tianjin University, China"],"raw_orcid":"https://orcid.org/0000-0002-8251-1669","affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034751153","display_name":"Felix Juefei-Xu","orcid":"https://orcid.org/0000-0002-0857-8611"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Felix Juefei-Xu","raw_affiliation_strings":["New York University, New York, New York, USA","New York University, USA"],"raw_orcid":"https://orcid.org/0000-0002-0857-8611","affiliations":[{"raw_affiliation_string":"New York University, New York, New York, USA","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"New York University, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101468661","display_name":"Lei Ma","orcid":"https://orcid.org/0000-0002-8621-2420"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]},{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["CA","JP"],"is_corresponding":false,"raw_author_name":"Lei Ma","raw_affiliation_strings":["University of Alberta, Edmonton, Alberta, Canada and The University of Tokyo, Tokyo, Japan","The University of Tokyo, Japan, and University of Alberta, Canada"],"raw_orcid":"https://orcid.org/0000-0002-8621-2420","affiliations":[{"raw_affiliation_string":"University of Alberta, Edmonton, Alberta, Canada and The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I154425047"]},{"raw_affiliation_string":"The University of Tokyo, Japan, and University of Alberta, Canada","institution_ids":["https://openalex.org/I74801974","https://openalex.org/I154425047"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101684275"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08997697,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":"3","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8339713215827942},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4662818908691406}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8339713215827942},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4662818908691406},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3744340","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3744340","pdf_url":null,"source":{"id":"https://openalex.org/S142627899","display_name":"ACM Transactions on Software Engineering and Methodology","issn_l":"1049-331X","issn":["1049-331X","1557-7392"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Software Engineering and Methodology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1504738275","https://openalex.org/W1996881001","https://openalex.org/W2097749765","https://openalex.org/W2147717514","https://openalex.org/W2616028256","https://openalex.org/W2948254043","https://openalex.org/W2951434086","https://openalex.org/W2954629067","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W3023593291","https://openalex.org/W3027484068","https://openalex.org/W3041012898","https://openalex.org/W3042703469","https://openalex.org/W3105347387","https://openalex.org/W3153947101","https://openalex.org/W3161493619","https://openalex.org/W3201174429","https://openalex.org/W4221138767","https://openalex.org/W4250955649","https://openalex.org/W4288076474","https://openalex.org/W4290943938","https://openalex.org/W4318219818","https://openalex.org/W4380353763","https://openalex.org/W4382318449","https://openalex.org/W4384345647","https://openalex.org/W4386566752","https://openalex.org/W4387561538","https://openalex.org/W4388179724","https://openalex.org/W4389518960","https://openalex.org/W4389518997","https://openalex.org/W4392669753","https://openalex.org/W4393335834","https://openalex.org/W4402665833","https://openalex.org/W4402670679","https://openalex.org/W6852874933"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Performance":[0],"evaluation":[1,51,87],"plays":[2],"a":[3,49,108,171,179,204],"crucial":[4],"role":[5],"in":[6,155],"the":[7,17,25,69,86,103,114,126,150,197],"development":[8,151],"lifecycle":[9],"of":[10,27,46,110,130,141,153,174],"large":[11,44],"language":[12],"models":[13],"(LLMs).":[14],"It":[15],"estimates":[16,102],"model\u2019s":[18],"capability,":[19],"elucidates":[20],"behavior":[21],"characteristics,":[22],"and":[23,30,56,81,139,148,190],"facilitates":[24],"identification":[26],"potential":[28],"issues":[29],"limitations,":[31],"thereby":[32],"guiding":[33],"further":[34],"improvement.":[35],"Given":[36],"that":[37,168,213],"LLMs\u2019":[38],"diverse":[39,133],"task-handling":[40],"abilities":[41],"stem":[42],"from":[43,193],"volumes":[45],"training":[47,142],"data,":[48],"comprehensive":[50],"also":[52],"necessitates":[53],"abundant,":[54],"well-annotated,":[55],"representative":[57],"test":[58,73,111,175,198],"data":[59,74,176],"to":[60,88,120,177,195,219],"assess":[61],"LLM":[62],"performance":[63,105,182,217],"across":[64,222],"various":[65,223],"downstream":[66],"tasks.":[67,224],"However,":[68],"demand":[70],"for":[71,166,184],"high-quality":[72],"often":[75],"entails":[76],"substantial":[77],"time,":[78],"computational":[79],"resources,":[80],"manual":[82],"efforts,":[83],"sometimes":[84],"causing":[85],"be":[89,121],"inefficient":[90],"or":[91],"impractical.":[92],"To":[93,144],"address":[94],"these":[95],"challenges,":[96],"researchers":[97],"propose":[98],"active":[99,116,162,207],"testing,":[100],"which":[101],"overall":[104],"by":[106],"selecting":[107],"subset":[109,173],"data.":[112],"Nevertheless,":[113],"existing":[115,220],"testing":[117,163],"methods":[118,221],"tend":[119],"inefficient,":[122],"even":[123],"inapplicable,":[124],"given":[125],"unique":[127],"new":[128],"challenges":[129],"LLMs":[131,167,194],"(e.g.,":[132],"task":[134],"types,":[135],"increased":[136],"model":[137],"complexity,":[138],"unavailability":[140],"data).":[143],"mitigate":[145],"such":[146],"limitations":[147],"expedite":[149],"cycle":[152],"LLMs,":[154],"this":[156],"work,":[157],"we":[158],"introduce":[159],"AcTracer,":[160],"an":[161],"framework":[164],"tailored":[165],"strategically":[169],"selects":[170],"small":[172],"achieve":[178],"more":[180],"accurate":[181],"estimation":[183],"LLMs.":[185],"AcTracer":[186,214],"utilizes":[187],"both":[188],"internal":[189],"external":[191],"information":[192],"guide":[196],"sampling":[199],"process,":[200],"reducing":[201],"variance":[202],"through":[203],"multi-stage":[205],"pool-based":[206],"selection.":[208],"Our":[209],"experiment":[210],"results":[211],"demonstrate":[212],"achieves":[215],"state-of-the-art":[216],"compared":[218]},"counts_by_year":[],"updated_date":"2026-02-14T06:23:00.392402","created_date":"2025-10-10T00:00:00"}
