{"id":"https://openalex.org/W4415433149","doi":"https://doi.org/10.21437/interspeech.2025-1632","title":"A Multimodal Chinese Dataset for Cross-lingual Sarcasm Detection","display_name":"A Multimodal Chinese Dataset for Cross-lingual Sarcasm Detection","publication_year":2025,"publication_date":"2025-08-17","ids":{"openalex":"https://openalex.org/W4415433149","doi":"https://doi.org/10.21437/interspeech.2025-1632"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2025-1632","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2025-1632","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.rug.nl/en/publications/d5727698-a560-45fb-925a-0c7b9abe9d43","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101560145","display_name":"Xiyuan Gao","orcid":"https://orcid.org/0000-0003-0870-6721"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xiyuan Gao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031480279","display_name":"Bruce Xiao Wang","orcid":"https://orcid.org/0000-0003-3564-2911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bruce Xiao Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100423583","display_name":"Meiling Zhang","orcid":"https://orcid.org/0000-0003-3540-9001"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meiling Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Shuming Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuming Huang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100748869","display_name":"Li Zhu","orcid":"https://orcid.org/0000-0001-5249-6536"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001110193","display_name":"Shekhar Nayak","orcid":"https://orcid.org/0000-0002-4277-4851"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shekhar Nayak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5090819693","display_name":"Matt Coler","orcid":"https://orcid.org/0000-0002-7631-5063"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matt Coler","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101560145"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40394959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3968","last_page":"3972"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.635699987411499,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.635699987411499,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13959","display_name":"Swearing, Euphemism, Multilingualism","score":0.5439000129699707,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sarcasm","display_name":"Sarcasm","score":0.9865000247955322},{"id":"https://openalex.org/keywords/certainty","display_name":"Certainty","score":0.5393999814987183},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.476500004529953},{"id":"https://openalex.org/keywords/pragmatics","display_name":"Pragmatics","score":0.3716999888420105},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.3693999946117401},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.2867000102996826}],"concepts":[{"id":"https://openalex.org/C2776207355","wikidata":"https://www.wikidata.org/wiki/Q191035","display_name":"Sarcasm","level":3,"score":0.9865000247955322},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6570000052452087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6399000287055969},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.616599977016449},{"id":"https://openalex.org/C7493553","wikidata":"https://www.wikidata.org/wiki/Q1520777","display_name":"Certainty","level":2,"score":0.5393999814987183},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.476500004529953},{"id":"https://openalex.org/C11693617","wikidata":"https://www.wikidata.org/wiki/Q181839","display_name":"Pragmatics","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35679998993873596},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2992999851703644},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.29409998655319214},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2919999957084656},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C2777375102","wikidata":"https://www.wikidata.org/wiki/Q208351","display_name":"Disgust","level":3,"score":0.2770000100135803},{"id":"https://openalex.org/C202889954","wikidata":"https://www.wikidata.org/wiki/Q1139554","display_name":"Subjectivity","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C2780277889","wikidata":"https://www.wikidata.org/wiki/Q282301","display_name":"Demonstrative","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2025-1632","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2025-1632","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2025","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.rug.nl:publications/d5727698-a560-45fb-925a-0c7b9abe9d43","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/d5727698-a560-45fb-925a-0c7b9abe9d43","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Gao, X, Wang, B X, Zhang, M, Huang, S, Li, Z, Nayak, S & Coler, M 2025, A Multimodal Chinese Dataset for Cross-lingual Sarcasm Detection. in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH. Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH, ISCA, pp. 3968-3972, 26th Interspeech Conference 2025, Rotterdam, Netherlands, 17/08/2025. https://doi.org/10.21437/Interspeech.2025-1632","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.rug.nl:openaire_cris_publications/d5727698-a560-45fb-925a-0c7b9abe9d43","is_oa":true,"landing_page_url":"https://hdl.handle.net/11370/d5727698-a560-45fb-925a-0c7b9abe9d43","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gao, X, Wang, B X, Zhang, M, Huang, S, Li, Z, Nayak, S & Coler, M 2025, A Multimodal Chinese Dataset for Cross-lingual Sarcasm Detection. in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH. Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH, ISCA, pp. 3968-3972, 26th Interspeech Conference 2025, Rotterdam, Netherlands, 17/08/2025. https://doi.org/10.21437/Interspeech.2025-1632","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.rug.nl:publications/d5727698-a560-45fb-925a-0c7b9abe9d43","is_oa":true,"landing_page_url":"https://research.rug.nl/en/publications/d5727698-a560-45fb-925a-0c7b9abe9d43","pdf_url":null,"source":{"id":"https://openalex.org/S4306400420","display_name":"University of Groningen research database (University of Groningen / Centre for Information Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I169381384","host_organization_name":"University of Groningen","host_organization_lineage":["https://openalex.org/I169381384"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Gao, X, Wang, B X, Zhang, M, Huang, S, Li, Z, Nayak, S & Coler, M 2025, A Multimodal Chinese Dataset for Cross-lingual Sarcasm Detection. in Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH. Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH, ISCA, pp. 3968-3972, 26th Interspeech Conference 2025, Rotterdam, Netherlands, 17/08/2025. https://doi.org/10.21437/Interspeech.2025-1632","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sarcasm":[0,70],"is":[1],"expressed":[2],"through":[3],"subtle":[4],"cues":[5],"like":[6],"pitch,":[7],"speech":[8],"rate,":[9],"and":[10,51,62,100],"facial":[11],"expressions,":[12],"with":[13],"patterns":[14],"varying":[15],"across":[16],"languages,":[17],"e.g.,":[18],"English":[19,50],"speakers":[20,26],"lower":[21],"the":[22,52,67,90,118],"pitch":[23],"while":[24],"Cantonese":[25],"raise":[27],"it.":[28],"While":[29],"humans":[30],"readily":[31],"interpret":[32],"these":[33],"signals,":[34],"computational":[35],"models":[36],"struggle,":[37],"creating":[38],"challenges":[39],"for":[40,57,120],"Human-Machine":[41],"Interaction.":[42],"Most":[43],"multimodal":[44],"sarcasm":[45,114,123],"recognition":[46],"research":[47],"focuses":[48],"on":[49],"lack":[53],"of":[54,76,92,97,104],"high-quality":[55],"datasets":[56],"other":[58],"languages":[59],"hinders":[60],"cross-lingual":[61,122],"cross-cultural":[63],"studies.":[64],"We":[65,78],"introduce":[66],"Multimodal":[68],"Chinese":[69],"Dataset":[71],"(MCSD),":[72],"containing":[73],"10.57":[74],"hours":[75],"video.":[77],"propose":[79],"a":[80,95,110],"standardized":[81],"annotation":[82],"framework":[83],"that":[84],"captures":[85],"annotator":[86],"certainty":[87],"to":[88,126],"reflect":[89],"subjectivity":[91],"sarcasm,":[93],"achieving":[94],"Fleiss'kappa":[96],"0.74":[98],"(unweighted)":[99],"0.79":[101],"(certainty-weighted).":[102],"Validation":[103],"our":[105],"dataset":[106],"using":[107],"SVM":[108],"achieves":[109],"76.64%":[111],"F1-score":[112],"in":[113],"detection.":[115],"MCSD":[116],"lays":[117],"foundation":[119],"robust":[121],"detection,":[124],"contributing":[125],"advanced,":[127],"human-centric":[128],"systems.":[129]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-24T00:00:00"}
