{"id":"https://openalex.org/W7128785481","doi":"https://doi.org/10.1109/tse.2026.3664287","title":"Toward Automated Validation of Language Model Synthesized Test Cases Using Semantic Entropy","display_name":"Toward Automated Validation of Language Model Synthesized Test Cases Using Semantic Entropy","publication_year":2026,"publication_date":"2026-02-13","ids":{"openalex":"https://openalex.org/W7128785481","doi":"https://doi.org/10.1109/tse.2026.3664287"},"language":null,"primary_location":{"id":"doi:10.1109/tse.2026.3664287","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2026.3664287","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041705313","display_name":"Hamed Taherkhani","orcid":null},"institutions":[{"id":"https://openalex.org/I192455969","display_name":"York University","ror":"https://ror.org/05fq50484","country_code":"CA","type":"education","lineage":["https://openalex.org/I192455969"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hamed Taherkhani","raw_affiliation_strings":["Lassonde School of Engineering, York University, Toronto, ON, Canada"],"raw_orcid":"https://orcid.org/0009-0004-0897-4800","affiliations":[{"raw_affiliation_string":"Lassonde School of Engineering, York University, Toronto, ON, Canada","institution_ids":["https://openalex.org/I192455969"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiho Shin","orcid":"https://orcid.org/0000-0001-8829-3773"},"institutions":[{"id":"https://openalex.org/I192455969","display_name":"York University","ror":"https://ror.org/05fq50484","country_code":"CA","type":"education","lineage":["https://openalex.org/I192455969"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiho Shin","raw_affiliation_strings":["Lassonde School of Engineering, York University, Toronto, ON, Canada"],"raw_orcid":"https://orcid.org/0000-0001-8829-3773","affiliations":[{"raw_affiliation_string":"Lassonde School of Engineering, York University, Toronto, ON, Canada","institution_ids":["https://openalex.org/I192455969"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125931417","display_name":"Muhammad Ammar Tahir","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Muhammad Ammar Tahir","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"raw_orcid":"https://orcid.org/0009-0006-7683-2614","affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007920485","display_name":"Md Rakib Hossain Misu","orcid":"https://orcid.org/0000-0002-7931-6782"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Rakib Hossain Misu","raw_affiliation_strings":["University of California, Irvine, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-7931-6782","affiliations":[{"raw_affiliation_string":"University of California, Irvine, CA, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107936794","display_name":"Vineet Sunil Gattani","orcid":null},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vineet Sunil Gattani","raw_affiliation_strings":["Arizona State University, Tempe, AZ, USA"],"raw_orcid":"https://orcid.org/0009-0007-8262-0105","affiliations":[{"raw_affiliation_string":"Arizona State University, Tempe, AZ, USA","institution_ids":["https://openalex.org/I55732556"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091120726","display_name":"Hadi Hemmati","orcid":null},"institutions":[{"id":"https://openalex.org/I192455969","display_name":"York University","ror":"https://ror.org/05fq50484","country_code":"CA","type":"education","lineage":["https://openalex.org/I192455969"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hadi Hemmati","raw_affiliation_strings":["Lassonde School of Engineering, York University, Toronto, ON, Canada"],"raw_orcid":"https://orcid.org/0000-0003-0204-9812","affiliations":[{"raw_affiliation_string":"Lassonde School of Engineering, York University, Toronto, ON, Canada","institution_ids":["https://openalex.org/I192455969"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17608977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"52","issue":"4","first_page":"1426","last_page":"1445"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.7021999955177307,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.7021999955177307,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.08100000023841858,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.0348999984562397,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/test-case","display_name":"Test case","score":0.6223000288009644},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.5127999782562256},{"id":"https://openalex.org/keywords/code-coverage","display_name":"Code coverage","score":0.46459999680519104},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.40610000491142273},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.3871999979019165},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3562999963760376},{"id":"https://openalex.org/keywords/fuzz-testing","display_name":"Fuzz testing","score":0.3529999852180481},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.3476000130176544},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3375000059604645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8828999996185303},{"id":"https://openalex.org/C128942645","wikidata":"https://www.wikidata.org/wiki/Q1568346","display_name":"Test case","level":3,"score":0.6223000288009644},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5421000123023987},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5386999845504761},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.5127999782562256},{"id":"https://openalex.org/C53942775","wikidata":"https://www.wikidata.org/wiki/Q1211721","display_name":"Code coverage","level":3,"score":0.46459999680519104},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4032000005245209},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40230000019073486},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.3871999979019165},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C111065885","wikidata":"https://www.wikidata.org/wiki/Q1189053","display_name":"Fuzz testing","level":3,"score":0.3529999852180481},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3375000059604645},{"id":"https://openalex.org/C165825675","wikidata":"https://www.wikidata.org/wiki/Q1399743","display_name":"Model-based testing","level":4,"score":0.3264000117778778},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.32499998807907104},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C80519477","wikidata":"https://www.wikidata.org/wiki/Q3532236","display_name":"Scenario testing","level":3,"score":0.31040000915527344},{"id":"https://openalex.org/C37926939","wikidata":"https://www.wikidata.org/wiki/Q7449061","display_name":"Semantic equivalence","level":4,"score":0.29820001125335693},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2953000068664551},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.26350000500679016},{"id":"https://openalex.org/C90697598","wikidata":"https://www.wikidata.org/wiki/Q3657183","display_name":"Objective test","level":3,"score":0.26269999146461487},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C7435765","wikidata":"https://www.wikidata.org/wiki/Q7705776","display_name":"Test Management Approach","level":5,"score":0.2603999972343445},{"id":"https://openalex.org/C169168650","wikidata":"https://www.wikidata.org/wiki/Q1675637","display_name":"Keyword-driven testing","level":5,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tse.2026.3664287","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2026.3664287","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},{"id":"pmh:doi:10.48550/arxiv.2411.08254","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2411.08254","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"Large":[1],"Language":[2],"Model":[3],"(LLM)-based":[4],"programming":[5],"agents":[6,44],"often":[7],"rely":[8],"on":[9,107],"test":[10,32,66,77,94,104,122,160,173],"execution":[11],"feedback":[12,37],"to":[13,63,92,126,154],"refine":[14],"their":[15],"generated":[16,22,68],"code.":[17,49],"These":[18],"tests":[19],"are":[20],"synthetically":[21],"by":[23,69,124,136],"LLMs.":[24,70],"However,":[25],"LLMs":[26,113],"may":[27],"produce":[28],"invalid":[29,99,103,159],"or":[30,98],"hallucinated":[31],"cases,":[33],"which":[34],"can":[35],"mislead":[36],"loops":[38],"and":[39,47,79,100,111,158,162,179],"degrade":[40],"the":[41,73,169],"performance":[42],"of":[43,76,171],"in":[45,139,176],"refining":[46],"improving":[48,168],"This":[50],"paper":[51],"introduces":[52],"<monospace":[53,84,116],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[54,85,117],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">VALTEST</monospace>,":[55],"a":[56,88,151,164],"novel":[57],"framework":[58],"that":[59,115,147],"leverages":[60],"semantic":[61,74,148],"entropy":[62,149],"automatically":[64],"validate":[65],"cases":[67,78,95,161,174],"By":[71],"analyzing":[72],"structure":[75],"computing":[80],"entropy-based":[81],"uncertainty":[82],"measures,":[83],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">VALTEST</monospace>":[86,118],"trains":[87],"machine":[89],"learning":[90],"model":[91],"classify":[93],"as":[96,134],"valid":[97,157],"filters":[101],"out":[102],"cases.":[105],"Experiments":[106],"multiple":[108],"benchmark":[109],"datasets":[110],"various":[112],"show":[114],"not":[119],"only":[120],"boosts":[121],"validity":[123],"up":[125],"29%":[127],"but":[128],"also":[129,145],"improves":[130],"code":[131,180],"generation":[132],"performance,":[133],"evidenced":[135],"significant":[137],"increases":[138],"pass@1":[140],"scores.":[141],"Our":[142],"extensive":[143],"experiments":[144],"reveal":[146],"is":[150],"reliable":[152],"indicator":[153],"distinguish":[155],"between":[156],"provides":[163],"robust":[165],"solution":[166],"for":[167],"correctness":[170],"LLM-generated":[172],"used":[175],"software":[177],"testing":[178],"generation.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-14T00:00:00"}
