{"id":"https://openalex.org/W7135224249","doi":"https://doi.org/10.48550/arxiv.2603.12117","title":"SommBench: Assessing Sommelier Expertise of Language Models","display_name":"SommBench: Assessing Sommelier Expertise of Language Models","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135224249","doi":"https://doi.org/10.48550/arxiv.2603.12117"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12117","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12117","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128946252","display_name":"William Brach","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Brach, William","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128940851","display_name":"Tomas Bedej","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bedej, Tomas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103791189","display_name":"Jacob Holm Nielsen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nielsen, Jacob","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128992541","display_name":"Jacob Pichna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pichna, Jacob","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128934707","display_name":"Juraj Bedej","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bedej, Juraj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128960328","display_name":"Eemeli Saarensilta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saarensilta, Eemeli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028928111","display_name":"Julie Dupouy","orcid":"https://orcid.org/0000-0001-7801-286X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dupouy, Julie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129049209","display_name":"Gianluca Barmina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barmina, Gianluca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128925133","display_name":"Andrea Blasi N\u00fa\u00f1ez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"N\u00fa\u00f1ez, Andrea Blasi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128990441","display_name":"Peter Schneider-Kamp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schneider-Kamp, Peter","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129070443","display_name":"Kristian Ko\u0161\u0165\u00e1l","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ko\u0161\u0165\u00e1l, Kristian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088100575","display_name":"Michal Ries","orcid":"https://orcid.org/0000-0002-9233-7123"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ries, Michal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128950864","display_name":"Lukas Galke Poech","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Poech, Lukas Galke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5128946252"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10971","display_name":"Olfactory and Sensory Function Studies","score":0.3662000000476837,"subfield":{"id":"https://openalex.org/subfields/2809","display_name":"Sensory Systems"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10971","display_name":"Olfactory and Sensory Function Studies","score":0.3662000000476837,"subfield":{"id":"https://openalex.org/subfields/2809","display_name":"Sensory Systems"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12032","display_name":"Multisensory perception and integration","score":0.12060000002384186,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10866","display_name":"Nutritional Studies and Diet","score":0.08799999952316284,"subfield":{"id":"https://openalex.org/subfields/2739","display_name":"Public Health, Environmental and Occupational Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6653000116348267},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.47600001096725464},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4668999910354614},{"id":"https://openalex.org/keywords/wine","display_name":"Wine","score":0.43779999017715454},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.399399995803833},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3799000084400177}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7233999967575073},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6653000116348267},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.567300021648407},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5508999824523926},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.47600001096725464},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4668999910354614},{"id":"https://openalex.org/C55952523","wikidata":"https://www.wikidata.org/wiki/Q3014419","display_name":"Wine","level":2,"score":0.43779999017715454},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3799000084400177},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.32100000977516174},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.31529998779296875},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C14103023","wikidata":"https://www.wikidata.org/wiki/Q11681459","display_name":"Pairing","level":3,"score":0.27799999713897705},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.26989999413490295},{"id":"https://openalex.org/C2778883600","wikidata":"https://www.wikidata.org/wiki/Q2390977","display_name":"Language proficiency","level":2,"score":0.26919999718666077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12117","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12117","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7067357301712036,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,53,144,167,192,245],"rapid":[2],"advances":[3],"of":[4,55,143,248],"large":[5],"language":[6,60,120,126,170,249],"models,":[7,171,181],"it":[8],"becomes":[9],"increasingly":[10],"important":[11],"to":[12,44,78,204,229],"systematically":[13],"evaluate":[14],"their":[15],"multilingual":[16,42],"and":[17,57,97,114,140,158,179,185,217,225,240],"multicultural":[18],"capabilities.":[19],"Previous":[20],"cultural":[21,28],"evaluation":[22],"benchmarks":[23],"focus":[24],"mainly":[25],"on":[26,198],"basic":[27],"knowledge":[29],"that":[30,191],"can":[31],"be":[32,230],"encoded":[33],"in":[34,52,104,133,148],"linguistic":[35],"form.":[36],"Here,":[37],"we":[38],"propose":[39],"SommBench,":[40],"a":[41,48,119,137,208],"benchmark":[43,129,242,252],"assess":[45],"sommelier":[46,139,246],"expertise,":[47],"domain":[49],"deeply":[50],"grounded":[51],"senses":[54],"smell":[56],"taste.":[58],"While":[59],"models":[61,174,195],"learn":[62],"about":[63],"sensory":[64,81],"properties":[65],"exclusively":[66],"through":[67],"textual":[68,74],"descriptions,":[69],"SommBench":[70,83,101,236],"tests":[71],"whether":[72],"this":[73],"grounding":[75],"is":[76,102,253],"sufficient":[77],"emulate":[79],"expert-level":[80],"judgment.":[82],"comprises":[84],"three":[85],"main":[86],"tasks:":[87],"Wine":[88,93],"Theory":[89],"Question":[90],"Answering":[91],"(WTQA),":[92],"Feature":[94],"Completion":[95],"(WFC),":[96],"Food-Wine":[98],"Pairing":[99],"(FWP).":[100],"available":[103,255],"multiple":[105],"languages:":[106],"English,":[107],"Slovak,":[108],"Swedish,":[109],"Finnish,":[110],"German,":[111],"Danish,":[112],"Italian,":[113],"Spanish.":[115],"This":[116],"helps":[117],"separate":[118],"model's":[121],"wine":[122,150,155,199],"expertise":[123,247],"from":[124],"its":[125],"skills.":[127],"The":[128,251],"datasets":[130],"were":[131],"developed":[132],"close":[134],"collaboration":[135],"with":[136,207],"professional":[138],"native":[141],"speakers":[142],"respective":[145],"languages,":[146],"resulting":[147],"1,024":[149],"theory":[151,200],"question-answering":[152],"questions,":[153],"1,000":[154,159],"feature-completion":[156],"examples,":[157],"food-wine":[160,218],"pairing":[161,219],"examples.":[162],"We":[163],"provide":[164],"results":[165,189,234],"for":[166,243],"most":[168,193],"popular":[169],"including":[172],"closed-weights":[173,209],"such":[175,182],"as":[176,183,237],"Gemini":[177],"2.5,":[178],"open-weights":[180],"GPT-OSS":[184],"Qwen":[186],"3.":[187],"Our":[188],"show":[190,220],"capable":[194],"perform":[196],"well":[197],"question":[201],"answering":[202],"(up":[203],"97%":[205],"correct":[206],"model),":[210],"yet":[211],"feature":[212],"completion":[213],"(peaking":[214],"at":[215,256],"65%)":[216],"(MCC":[221],"ranging":[222],"between":[223],"0":[224],"0.39)":[226],"turn":[227],"out":[228],"more":[231],"challenging.":[232],"These":[233],"position":[235],"an":[238],"interesting":[239],"challenging":[241],"evaluating":[244],"models.":[250],"publicly":[254],"https://github.com/sommify/sommbench.":[257]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-14T00:00:00"}
