{"id":"https://openalex.org/W4280605341","doi":"https://doi.org/10.1145/3533767.3534394","title":"AEON: a method for automatic evaluation of NLP test cases","display_name":"AEON: a method for automatic evaluation of NLP test cases","publication_year":2022,"publication_date":"2022-07-15","ids":{"openalex":"https://openalex.org/W4280605341","doi":"https://doi.org/10.1145/3533767.3534394"},"language":"en","primary_location":{"id":"doi:10.1145/3533767.3534394","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3533767.3534394","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047584018","display_name":"Jen-tse Huang","orcid":"https://orcid.org/0000-0003-3446-0083"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jen-tse Huang","raw_affiliation_strings":["Chinese University of Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100390066","display_name":"Jianping Zhang","orcid":"https://orcid.org/0000-0001-7141-9464"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianping Zhang","raw_affiliation_strings":["Chinese University of Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100755181","display_name":"Wenxuan Wang","orcid":"https://orcid.org/0000-0001-7172-8662"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenxuan Wang","raw_affiliation_strings":["Chinese University of Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101941950","display_name":"Pinjia He","orcid":"https://orcid.org/0000-0003-3377-8129"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pinjia He","raw_affiliation_strings":["Chinese University of Hong Kong at Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong at Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101500798","display_name":"Yuxin Su","orcid":"https://orcid.org/0000-0002-3338-8561"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Su","raw_affiliation_strings":["Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069596903","display_name":"Michael R. Lyu","orcid":"https://orcid.org/0000-0002-3666-5798"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Michael R. Lyu","raw_affiliation_strings":["Chinese University of Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5047584018"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":5.1535,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.95704504,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"202","last_page":"214"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7202996611595154},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7079324722290039},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.632285475730896},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.5602587461471558}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7202996611595154},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7079324722290039},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.632285475730896},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.5602587461471558},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3533767.3534394","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3533767.3534394","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1647671624","https://openalex.org/W2058373514","https://openalex.org/W2081580037","https://openalex.org/W2148815373","https://openalex.org/W2152195021","https://openalex.org/W2163455955","https://openalex.org/W2516621648","https://openalex.org/W2560674852","https://openalex.org/W2607219512","https://openalex.org/W2609368435","https://openalex.org/W2616028256","https://openalex.org/W2741609678","https://openalex.org/W2799640043","https://openalex.org/W2804337238","https://openalex.org/W2806718802","https://openalex.org/W2888307014","https://openalex.org/W2898868990","https://openalex.org/W2919275752","https://openalex.org/W2922293812","https://openalex.org/W2947133760","https://openalex.org/W2949128310","https://openalex.org/W2954629067","https://openalex.org/W2954855426","https://openalex.org/W2954903132","https://openalex.org/W2957066083","https://openalex.org/W2957905354","https://openalex.org/W2962784628","https://openalex.org/W2962818872","https://openalex.org/W2963327228","https://openalex.org/W2963918774","https://openalex.org/W2964150020","https://openalex.org/W2964164993","https://openalex.org/W2968940383","https://openalex.org/W2970078867","https://openalex.org/W2970641574","https://openalex.org/W2988194011","https://openalex.org/W3000442532","https://openalex.org/W3007157104","https://openalex.org/W3015001695","https://openalex.org/W3035441470","https://openalex.org/W3035507081","https://openalex.org/W3035736465","https://openalex.org/W3090988182","https://openalex.org/W3091407209","https://openalex.org/W3091896612","https://openalex.org/W3101094684","https://openalex.org/W3102720581","https://openalex.org/W3104208618","https://openalex.org/W3104423855","https://openalex.org/W3104570147","https://openalex.org/W3105604018","https://openalex.org/W3156636935","https://openalex.org/W3163644064","https://openalex.org/W3169948074","https://openalex.org/W3194983542","https://openalex.org/W4210497109","https://openalex.org/W4365799834","https://openalex.org/W6600424091"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Due":[0],"to":[1,17,69,82,168,249],"the":[2,19,44,56,64,99,104,192,201,212,223,233],"labor-intensive":[3],"nature":[4],"of":[5,21,63,98,121,227],"manual":[6,116],"test":[7,34,66,90,100,112,148,153,170,209,230,243],"oracle":[8],"construction,":[9],"various":[10],"automated":[11],"testing":[12,176],"techniques":[13,30,177],"have":[14],"been":[15],"proposed":[16],"enhance":[18],"reliability":[20],"Natural":[22],"Language":[23],"Processing":[24],"(NLP)":[25],"software.":[26,263],"In":[27,197,218],"theory,":[28],"these":[29],"mutate":[31],"an":[32,48],"existing":[33],"case":[35],"(e.g.,":[36,77],"a":[37,83],"sentence":[38],"with":[39,194,242],"its":[40],"label)":[41],"and":[42,54,74,88,119,162,255],"assume":[43],"generated":[45,65,102,152,172],"one":[46],"preserves":[47],"equivalent":[49],"or":[50],"similar":[51,71],"semantic":[52,72,160,207],"meaning":[53,73],"thus,":[55],"same":[57],"label.":[58],"However,":[59],"in":[60,133,205,260],"practice,":[61],"many":[62],"cases":[67,101,113,171,244],"fail":[68],"preserve":[70],"are":[75,108,252],"unnatural":[76,89,229],"grammar":[78],"errors),":[79],"which":[80],"leads":[81,248],"high":[84],"false":[85,109],"alarm":[86],"rate":[87],"cases.":[91,149],"Our":[92],"evaluation":[93],"study":[94],"finds":[95],"that":[96,189,251],"44%":[97],"by":[103,173,216,235,246],"state-of-the-art":[105],"(SOTA)":[106],"approaches":[107],"alarms.":[110],"These":[111],"require":[114],"extensive":[115],"checking":[117],"effort,":[118],"instead":[120],"improving":[122,261],"NLP":[123,129,147,184,262],"software,":[124],"they":[125],"can":[126],"even":[127],"degrade":[128],"software":[130],"when":[131],"utilized":[132],"model":[134,240],"training.":[135],"To":[136],"address":[137],"this":[138],"problem,":[139],"we":[140],"propose":[141],"AEON":[142,167,190,199,220,247],"for":[143],"Automatic":[144],"Evaluation":[145],"Of":[146],"For":[150],"each":[151],"case,":[154],"it":[155],"outputs":[156],"scores":[157],"based":[158],"on":[159,178],"similarity":[161],"language":[163],"naturalness.":[164],"We":[165],"employ":[166],"evaluate":[169],"four":[174],"popular":[175],"five":[179],"datasets":[180],"across":[181],"three":[182],"typical":[183],"tasks.":[185],"The":[186],"results":[187],"show":[188],"aligns":[191],"best":[193,202,213],"human":[195],"judgment.":[196],"particular,":[198],"achieves":[200],"average":[203,225],"precision":[204,226],"detecting":[206],"inconsistent":[208],"cases,":[210,231],"outperforming":[211],"baseline":[214],"metric":[215],"10%.":[217],"addition,":[219],"also":[221],"has":[222],"highest":[224],"finding":[228],"surpassing":[232],"baselines":[234],"more":[236,253],"than":[237],"15%.":[238],"Moreover,":[239],"training":[241],"prioritized":[245],"models":[250],"accurate":[254],"robust,":[256],"demonstrating":[257],"AEON\u2019s":[258],"potential":[259]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
