{"id":"https://openalex.org/W4415536564","doi":"https://doi.org/10.1145/3746027.3758173","title":"Can Audio Language Models Listen Between the Lines? A Study on Metaphorical Reasoning via <scp>Unspoken</scp>","display_name":"Can Audio Language Models Listen Between the Lines? A Study on Metaphorical Reasoning via <scp>Unspoken</scp>","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415536564","doi":"https://doi.org/10.1145/3746027.3758173"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3758173","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3758173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053229311","display_name":"Hongru Xiao","orcid":"https://orcid.org/0009-0007-6715-357X"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongru Xiao","raw_affiliation_strings":["College of Civil Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"College of Civil Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091948790","display_name":"Xiang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057801749","display_name":"Dongteng Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duyi Pan","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037309012","display_name":"Longfei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210166499","display_name":"Henan Polytechnic University","ror":"https://ror.org/05vr1c885","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210166499"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longfei Zhang","raw_affiliation_strings":["Henan Polytechnic University, JiaoZuo, Henan, China"],"affiliations":[{"raw_affiliation_string":"Henan Polytechnic University, JiaoZuo, Henan, China","institution_ids":["https://openalex.org/I4210166499"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120131292","display_name":"ZhixueSong ZhixueSong","orcid":null},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"ZhixueSong ZhixueSong","raw_affiliation_strings":["China University of Mining Technology - Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"China University of Mining Technology - Beijing, Beijing, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011412438","display_name":"Jiale Han","orcid":"https://orcid.org/0000-0001-6477-0424"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiale Han","raw_affiliation_strings":["The Hong Kong University of Science and Technology, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology, Hong Kong, China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086484127","display_name":"Songning Lai","orcid":"https://orcid.org/0009-0007-3132-9414"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Songning Lai","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013569204","display_name":"Wenshuo Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenshuo Chen","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101451926","display_name":"Jing Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Tang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057282504","display_name":"Benyou Wang","orcid":"https://orcid.org/0000-0002-1501-9914"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Benyou Wang","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5053229311"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31768306,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"12464","last_page":"12472"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metaphor","display_name":"Metaphor","score":0.7105000019073486},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5889000296592712},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.5440999865531921},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5133000016212463},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4453999996185303},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.40290001034736633}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7164000272750854},{"id":"https://openalex.org/C2778311575","wikidata":"https://www.wikidata.org/wiki/Q18534","display_name":"Metaphor","level":2,"score":0.7105000019073486},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5889000296592712},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.5440999865531921},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5310999751091003},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5133000016212463},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48080000281333923},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4453999996185303},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2720000147819519}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3758173","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3758173","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2895062073","https://openalex.org/W3045533700","https://openalex.org/W3173528751","https://openalex.org/W4285250921","https://openalex.org/W4385822632","https://openalex.org/W4391515560","https://openalex.org/W4392487628","https://openalex.org/W4392902623","https://openalex.org/W4402684170"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,12,79,148,164,181],"Audio":[3],"Language":[4],"Models":[5],"(ALMs)":[6],"have":[7],"led":[8],"to":[9,30,48,73],"significant":[10],"improvements":[11],"speech-related":[13],"tasks.":[14],"However,":[15],"their":[16],"capacity":[17],"for":[18,160,189],"profound":[19],"metaphorical":[20,162],"reasoning,":[21],"especially":[22],"when":[23],"derived":[24],"from":[25],"audio-specific":[26],"cues,":[27],"has":[28],"yet":[29],"be":[31],"thoroughly":[32],"investigated.":[33],"To":[34],"address":[35],"this":[36],"gap,":[37],"we":[38,138],"introduce":[39],"Unspoken,":[40],"a":[41,83,115,157,169],"bilingual":[42],"(Chinese-English)":[43],"question":[44],"answering":[45],"benchmark":[46],"designed":[47],"assess":[49],"ALMs'":[50],"comprehension":[51],"of":[52,86,107,131],"non-literal,":[53],"metaphor-rich":[54],"audio.":[55],"Unlike":[56],"prior":[57],"text-centric":[58],"evaluations,":[59],"Unspoken":[60,153],"emphasizes":[61],"prosody,":[62],"phonetic":[63],"ambiguity,":[64],"emotional":[65],"inflection,":[66],"and":[67,90,100,102,197],"other":[68],"nuanced":[69],"acoustic":[70],"features":[71],"critical":[72],"metaphor":[74,179],"understanding":[75,180],"but":[76,166],"often":[77],"lost":[78],"transcription.":[80],"We":[81],"construct":[82],"high-quality":[84],"dataset":[85],"2,764":[87],"manually":[88],"curated":[89],"validated":[91],"QA":[92],"pairs,":[93],"spanning":[94],"three":[95],"reasoning":[96,151,163],"dimensions:":[97],"semantic,":[98],"acoustic,":[99],"contextual,":[101],"covering":[103],"six":[104],"common":[105],"types":[106],"metaphors.":[108],"Evaluation":[109],"across":[110],"23":[111],"mainstream":[112],"ALMs":[113],"reveals":[114],"substantial":[116],"performance":[117],"gap:":[118],"the":[119,128,135],"best":[120],"model":[121],"achieves":[122],"only":[123,155],"69.5%":[124],"accuracy,":[125],"significantly":[126],"below":[127],"human":[129,183],"average":[130],"81.1%.":[132],"By":[133],"analyzing":[134],"error":[136],"patterns,":[137],"identify":[139],"five":[140],"key":[141],"failure":[142],"modes":[143],"that":[144,173],"reveal":[145],"fundamental":[146],"limitations":[147],"current":[149],"models'":[150],"capabilities.":[152],"not":[154],"sets":[156],"new":[158],"standard":[159],"evaluating":[161],"audio":[165],"also":[167],"pioneers":[168],"novel":[170],"research":[171],"direction":[172],"moves":[174],"beyond":[175],"transcription-based":[176],"assessments.":[177],"Grounding":[178],"authentic":[182],"communication":[184],"scenarios":[185],"offers":[186],"deep":[187],"insight":[188],"developing":[190],"more":[191],"cognitively":[192],"capable":[193],"ALMs.":[194],"The":[195],"data":[196],"codes":[198],"are":[199],"available":[200],"at":[201],"https://github.com/Hongru0306/UNSPOKEN.":[202]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
