{"id":"https://openalex.org/W7135172765","doi":"https://doi.org/10.48550/arxiv.2603.11687","title":"SemBench: A Universal Semantic Framework for LLM Evaluation","display_name":"SemBench: A Universal Semantic Framework for LLM Evaluation","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135172765","doi":"https://doi.org/10.48550/arxiv.2603.11687"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11687","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11687","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11687","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128923033","display_name":"Mikel Zubillaga","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zubillaga, Mikel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054410343","display_name":"Naiara Mart\u00ednez P\u00e9rez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Perez, Naiara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128968560","display_name":"Oscar Sainz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sainz, Oscar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5121401188","display_name":"German Rigau","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rigau, German","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.28459998965263367,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.28459998965263367,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.22269999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.11159999668598175,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6172999739646912},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.4839000105857849},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.40700000524520874},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.39899998903274536},{"id":"https://openalex.org/keywords/competence","display_name":"Competence (human resources)","score":0.3984000086784363},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3926999866962433},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.3869999945163727},{"id":"https://openalex.org/keywords/semantic-analysis","display_name":"Semantic analysis (machine learning)","score":0.31279999017715454}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7746000289916992},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6330999732017517},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6172999739646912},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5935999751091003},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.4839000105857849},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.40700000524520874},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.39899998903274536},{"id":"https://openalex.org/C100521375","wikidata":"https://www.wikidata.org/wiki/Q2015382","display_name":"Competence (human resources)","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3869999945163727},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.31279999017715454},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.29019999504089355},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C17601614","wikidata":"https://www.wikidata.org/wiki/Q1346205","display_name":"Linguistic competence","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C85407183","wikidata":"https://www.wikidata.org/wiki/Q1045785","display_name":"Semantic network","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11687","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11687","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11687","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11687","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2,109,180],"Natural":[3],"Language":[4,15],"Processing":[5],"(NLP)":[6],"has":[7],"been":[8],"driven":[9],"by":[10],"the":[11,30,77,94],"emergence":[12],"of":[13,34,80,119,127,155,177],"Large":[14],"Models":[16],"(LLMs),":[17],"which":[18],"exhibit":[19],"remarkable":[20],"generative":[21],"and":[22,56,87,104,114,122,162,171],"reasoning":[23],"capabilities.":[24],"However,":[25],"despite":[26],"their":[27,52],"success,":[28],"evaluating":[29],"true":[31],"semantic":[32,78,178],"understanding":[33,179],"these":[35],"models":[36],"remains":[37],"a":[38,68,88,124,152,168],"persistent":[39],"challenge.":[40],"Traditional":[41],"benchmarks":[42,74],"such":[43],"as":[44],"Word-in-Context":[45],"(WiC)":[46],"effectively":[47],"probe":[48],"this":[49,63],"capability,":[50],"but":[51],"creation":[53],"is":[54,157],"resource-intensive":[55],"often":[57],"limited":[58],"to":[59,159],"high-resource":[60],"languages.":[61],"In":[62],"paper,":[64],"we":[65],"introduce":[66],"SemBench,":[67],"framework":[69,173],"for":[70,96,174],"automatically":[71],"generating":[72],"synthetic":[73],"that":[75,132,150],"assess":[76],"competence":[79],"LLMs":[81],"using":[82],"only":[83,151],"dictionary":[84],"sense":[85],"definitions":[86],"sentence":[89],"encoder.":[90],"This":[91],"approach":[92],"eliminates":[93],"need":[95],"curated":[97],"example":[98],"sentences,":[99],"making":[100],"it":[101],"both":[102],"scalable":[103],"language-independent.":[105],"We":[106],"evaluate":[107],"SemBench":[108,136,166],"three":[110],"languages":[111],"(English,":[112],"Spanish,":[113],"Basque)":[115],"spanning":[116],"different":[117],"levels":[118],"linguistic":[120],"resources,":[121],"across":[123],"wide":[125],"range":[126],"LLMs.":[128,181],"Our":[129],"results":[130],"show":[131],"rankings":[133],"derived":[134],"from":[135,142],"strongly":[137],"correlate":[138],"with":[139],"those":[140],"obtained":[141],"standard":[143],"WiC":[144],"datasets.":[145],"Furthermore,":[146],"our":[147],"analysis":[148],"demonstrates":[149],"small":[153],"number":[154],"examples":[156],"required":[158],"achieve":[160],"stable":[161],"meaningful":[163],"rankings.":[164],"Overall,":[165],"provides":[167],"lightweight,":[169],"adaptable,":[170],"data-efficient":[172],"cross-lingual":[175],"evaluation":[176]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
