{"id":"https://openalex.org/W7116416084","doi":"https://doi.org/10.1109/access.2025.3646270","title":"Metamorphic Testing for Semantic Invariance in Large Language Models","display_name":"Metamorphic Testing for Semantic Invariance in Large Language Models","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7116416084","doi":"https://doi.org/10.1109/access.2025.3646270"},"language":null,"primary_location":{"id":"doi:10.1109/access.2025.3646270","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3646270","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3646270","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002158202","display_name":"J. de Curt\u00f2","orcid":"https://orcid.org/0000-0002-8334-4719"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"J. De Curt\u00f2","raw_affiliation_strings":["Department of Computer Applications in Science and Engineering, Barcelona Supercomputing Center, Barcelona, Spain"],"raw_orcid":"https://orcid.org/0000-0002-8334-4719","affiliations":[{"raw_affiliation_string":"Department of Computer Applications in Science and Engineering, Barcelona Supercomputing Center, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848","https://openalex.org/I2799803557"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5120903819","display_name":"I. De Zarz\u00e0","orcid":null},"institutions":[{"id":"https://openalex.org/I4210112527","display_name":"Luxembourg Institute of Science and Technology","ror":"https://ror.org/01t178j62","country_code":"LU","type":"nonprofit","lineage":["https://openalex.org/I4210112527"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"I. De Zarz\u00e0","raw_affiliation_strings":["Human Centered AI, Data and Software, Luxembourg Institute of Science and Technology, Esch-sur-Alzette, Luxembourg"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Human Centered AI, Data and Software, Luxembourg Institute of Science and Technology, Esch-sur-Alzette, Luxembourg","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5002158202"],"corresponding_institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":4.1552,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95209764,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"13","issue":null,"first_page":"214772","last_page":"214791"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.219200000166893,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.219200000166893,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.1378999948501587,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.11550000309944153,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paraphrase","display_name":"Paraphrase","score":0.5795000195503235},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5778999924659729},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4375},{"id":"https://openalex.org/keywords/automated-reasoning","display_name":"Automated reasoning","score":0.40560001134872437},{"id":"https://openalex.org/keywords/commonsense-reasoning","display_name":"Commonsense reasoning","score":0.35429999232292175},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.35190001130104065},{"id":"https://openalex.org/keywords/deductive-reasoning","display_name":"Deductive reasoning","score":0.3321000039577484}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6780999898910522},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.5795000195503235},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5778999924659729},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5778999924659729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5412999987602234},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4375},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.40560001134872437},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3183000087738037},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.3156999945640564},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.29019999504089355},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C136172866","wikidata":"https://www.wikidata.org/wiki/Q1088088","display_name":"Possible world","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.2687000036239624},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C43971567","wikidata":"https://www.wikidata.org/wiki/Q3142865","display_name":"Logical reasoning","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/access.2025.3646270","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3646270","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3646270","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3646270","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320316401","display_name":"Luxembourg Institute of Science and Technology","ror":"https://ror.org/01t178j62"},{"id":"https://openalex.org/F4320323868","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1840435438","https://openalex.org/W2041282815","https://openalex.org/W2041650849","https://openalex.org/W2101105183","https://openalex.org/W2782311202","https://openalex.org/W2859484040","https://openalex.org/W2888307014","https://openalex.org/W2901401834","https://openalex.org/W2923014074","https://openalex.org/W2963123047","https://openalex.org/W2963327228","https://openalex.org/W2970641574","https://openalex.org/W3003257820","https://openalex.org/W3035507081","https://openalex.org/W3035671939","https://openalex.org/W3156636935","https://openalex.org/W3175362188","https://openalex.org/W3194983542","https://openalex.org/W3202712981","https://openalex.org/W4323655724","https://openalex.org/W4384561707","https://openalex.org/W4385572001","https://openalex.org/W4389727268","https://openalex.org/W4402670901","https://openalex.org/W4404515031","https://openalex.org/W4406892603","https://openalex.org/W4408426533"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"demonstrated":[5],"remarkable":[6],"capabilities":[7],"in":[8,42,119,168,192],"mathematical":[9],"and":[10,63,76,91,115,140,201],"scientific":[11],"reasoning":[12,25,79,153,166,194],"tasks,":[13],"yet":[14,105],"standard":[15],"benchmark":[16],"evaluations":[17],"fail":[18],"to":[19,113,159,182,205],"capture":[20],"the":[21],"stability":[22],"of":[23,71,137,210],"these":[24],"processes":[26],"under":[27],"semantically":[28],"equivalent":[29],"input":[30],"variations.":[31],"We":[32,66,196],"introduce":[33],"a":[34,68],"metamorphic":[35,51,176],"testing":[36,177],"framework":[37,200],"for":[38,131,189],"assessing":[39],"semantic":[40],"invariance":[41,102],"LLM":[43],"reasoning,":[44],"systematically":[45],"evaluating":[46],"model":[47],"responses":[48],"across":[49],"eight":[50,82],"relations:":[52],"identity,":[53],"paraphrase,":[54],"fact":[55],"reordering,":[56],"expansion,":[57],"contraction,":[58],"academic":[59],"context,":[60,62],"business":[61],"contrastive":[64],"formulation.":[65],"conduct":[67],"comparative":[69],"analysis":[70,145],"two":[72],"recent":[73],"foundation":[74,211],"models\u2014Hermes-4-70B":[75],"DeepSeek-R1-0528\u2014across":[77],"79":[78],"problems":[80],"spanning":[81],"categories":[83],"(Physics,":[84],"Mathematics,":[85],"Chemistry,":[86],"Economics,":[87],"Statistics,":[88],"Biology,":[89],"Calculus,":[90],"Optimization)":[92],"at":[93],"three":[94],"difficulty":[95],"levels.":[96],"Both":[97],"models":[98],"exhibit":[99],"similar":[100],"aggregate":[101],"scores":[103],"(~0.90),":[104],"manifest":[106],"distinct":[107],"vulnerability":[108],"profiles\u2013DeepSeek":[109],"shows":[110],"notable":[111],"sensitivity":[112],"paraphrase":[114],"fact-reordering":[116],"transformations,":[117],"particularly":[118,129],"formal":[120],"domains":[121],"(Calculus:":[122],"-0.104,":[123],"Statistics:":[124],"-0.048).":[125],"Contrastive":[126],"transformations":[127],"prove":[128],"challenging":[130],"Hermes,":[132],"inducing":[133],"mean":[134],"score":[135],"deltas":[136],"-0.147":[138],"(Hermes)":[139],"-0.005":[141],"(DeepSeek).":[142],"Semantic":[143],"similarity":[144],"further":[146],"reveals":[147],"that":[148,175],"Hermes":[149],"maintains":[150],"more":[151],"coherent":[152],"traces":[154],"(mean":[155],"similarity:":[156],"0.87)":[157],"compared":[158],"DeepSeek":[160,163],"(0.78),":[161],"with":[162],"exhibiting":[164],"near-complete":[165],"breakdown":[167],"specific":[169],"category-transformation":[170],"combinations.":[171],"These":[172],"findings":[173],"demonstrate":[174],"uncovers":[178],"robustness":[179,208],"characteristics":[180],"invisible":[181],"conventional":[183],"accuracy":[184],"metrics,":[185],"providing":[186],"essential":[187],"insights":[188],"deploying":[190],"LLMs":[191],"high-stakes":[193],"applications.":[195],"release":[197],"our":[198],"evaluation":[199],"complete":[202],"experimental":[203],"data":[204],"facilitate":[206],"reproducible":[207],"assessment":[209],"models.":[212]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-12-21T00:00:00"}
