{"id":"https://openalex.org/W4409495247","doi":"https://doi.org/10.1109/icstw64639.2025.10962520","title":"Evaluating Large Language Model Robustness using Combinatorial Testing","display_name":"Evaluating Large Language Model Robustness using Combinatorial Testing","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4409495247","doi":"https://doi.org/10.1109/icstw64639.2025.10962520"},"language":"en","primary_location":{"id":"doi:10.1109/icstw64639.2025.10962520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icstw64639.2025.10962520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Software Testing, Verification and Validation Workshops (ICSTW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089497572","display_name":"Jaganmohan Chandrasekaran","orcid":"https://orcid.org/0000-0001-8694-4296"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jaganmohan Chandrasekaran","raw_affiliation_strings":["Virginia Tech,Sanghani Center for Artificial Intelligence &#x0026; Data Analytics,Arlington,VA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virginia Tech,Sanghani Center for Artificial Intelligence &#x0026; Data Analytics,Arlington,VA,USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055877602","display_name":"Ankita Ramjibhai Patel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ankita Ramjibhai Patel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054460502","display_name":"Erin Lanus","orcid":"https://orcid.org/0000-0001-8263-0521"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erin Lanus","raw_affiliation_strings":["Virginia Tech,National Security Institute,Arlington,VA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virginia Tech,National Security Institute,Arlington,VA,USA","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059050900","display_name":"Laura Freeman","orcid":"https://orcid.org/0000-0001-7108-3921"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Laura J. Freeman","raw_affiliation_strings":["Virginia Tech,National Security Institute,Arlington,VA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Virginia Tech,National Security Institute,Arlington,VA,USA","institution_ids":["https://openalex.org/I859038795"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.2763,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94912091,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"300","last_page":"309"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.910099983215332,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7234398126602173},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7228406667709351},{"id":"https://openalex.org/keywords/robustness-testing","display_name":"Robustness testing","score":0.5911623239517212},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4771696627140045},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3288979232311249},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3279150128364563},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.32592564821243286},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.08821123838424683}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7234398126602173},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7228406667709351},{"id":"https://openalex.org/C137726913","wikidata":"https://www.wikidata.org/wiki/Q7353550","display_name":"Robustness testing","level":3,"score":0.5911623239517212},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4771696627140045},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3288979232311249},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3279150128364563},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.32592564821243286},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.08821123838424683},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icstw64639.2025.10962520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icstw64639.2025.10962520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Software Testing, Verification and Validation Workshops (ICSTW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306078","display_name":"U.S. Department of Defense","ror":"https://ror.org/0447fe631"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1987220198","https://openalex.org/W2021509247","https://openalex.org/W2075699551","https://openalex.org/W2735780801","https://openalex.org/W3034465864","https://openalex.org/W3166372833","https://openalex.org/W3170449064","https://openalex.org/W4282541642","https://openalex.org/W4282572644","https://openalex.org/W4386810265","https://openalex.org/W4391136507","https://openalex.org/W4400074414","https://openalex.org/W4401043020","https://openalex.org/W4402571406","https://openalex.org/W6782879696","https://openalex.org/W6856121608","https://openalex.org/W6858209431","https://openalex.org/W6863250841","https://openalex.org/W6867973906","https://openalex.org/W6869795791"],"related_works":["https://openalex.org/W1985538490","https://openalex.org/W2385695368","https://openalex.org/W2048346223","https://openalex.org/W1566806263","https://openalex.org/W1980755708","https://openalex.org/W2560201613","https://openalex.org/W172227863","https://openalex.org/W2112647495","https://openalex.org/W1576602683","https://openalex.org/W2013496695"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,11,91,106,146],"large":[3],"language":[4],"models":[5],"(LLMs)":[6],"have":[7,57,80],"demonstrated":[8],"remarkable":[9],"proficiency":[10],"understanding":[12,68],"and":[13,45,47,69,117,171],"generating":[14],"human-like":[15],"text,":[16],"leading":[17],"to":[18,49,65,86],"widespread":[19],"adoption":[20],"across":[21,32,71],"domains.":[22],"Given":[23],"LLM\u2019s":[24,67],"versatile":[25],"capabilities,":[26],"current":[27],"evaluation":[28,63,156],"practices":[29],"assess":[30,66],"LLMs":[31,83],"a":[33,51,60,111,166,179,193],"wide":[34],"variety":[35],"of":[36,89,121,140,150,175,197],"tasks,":[37,93],"including":[38],"answer":[39],"generation,":[40],"sentiment":[41],"analysis,":[42],"text":[43],"completion,":[44],"question":[46],"answers,":[48],"name":[50],"few.":[52],"Multiple":[53],"choice":[54],"questions":[55],"(MCQ)":[56],"emerged":[58],"as":[59],"widely":[61,167],"used":[62,145,168],"task":[64],"reasoning":[70],"various":[72],"subject":[73],"areas.":[74],"However,":[75],"studies":[76],"from":[77],"the":[78,87,103,126,130,138,148,158,173,184],"literature":[79],"revealed":[81],"that":[82],"exhibit":[84],"sensitivity":[85],"ordering":[88],"options":[90],"MCQ":[92,169],"with":[94,192],"performance":[95],"variations":[96],"based":[97],"on":[98],"option":[99],"sequence,":[100],"thus":[101],"underscoring":[102],"robustness":[104,119,149,174,190],"concerns":[105],"LLM":[107],"performance.This":[108],"work":[109],"presents":[110],"combinatorial":[112],"testing-based":[113],"framework":[114,131,185],"for":[115],"systematic":[116],"comprehensive":[118],"assessment":[120],"pre-trained":[122,180],"LLMs.":[123,151],"By":[124],"leveraging":[125],"sequence":[127],"covering":[128],"array,":[129],"constructs":[132],"test":[133],"sets":[134],"by":[135],"systematically":[136],"swapping":[137],"order":[139],"options,":[141],"which":[142],"are":[143],"then":[144],"ascertaining":[147],"We":[152],"performed":[153],"an":[154],"experimental":[155],"using":[157],"Measuring":[159],"Massive":[160],"Multitask":[161],"Language":[162],"Understanding":[163],"(MMLU)":[164],"dataset,":[165],"dataset":[170],"evaluated":[172],"GPT":[176],"3.5":[177],"Turbo,":[178],"LLM.":[181],"Results":[182],"suggest":[183],"can":[186],"effectively":[187],"identify":[188],"numerous":[189],"issues":[191],"relatively":[194],"minimal":[195],"number":[196],"tests.":[198]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
