{"id":"https://openalex.org/W4387421359","doi":"https://doi.org/10.1145/3577190.3614110","title":"Enhancing Resilience to Missing Data in Audio-Text Emotion Recognition with Multi-Scale Chunk Regularization","display_name":"Enhancing Resilience to Missing Data in Audio-Text Emotion Recognition with Multi-Scale Chunk Regularization","publication_year":2023,"publication_date":"2023-10-07","ids":{"openalex":"https://openalex.org/W4387421359","doi":"https://doi.org/10.1145/3577190.3614110"},"language":"en","primary_location":{"id":"doi:10.1145/3577190.3614110","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3577190.3614110","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614110","source":{"id":"https://openalex.org/S4363608440","display_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614110","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070819601","display_name":"Wei-Cheng Lin","orcid":"https://orcid.org/0000-0003-1933-1590"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wei-Cheng Lin","raw_affiliation_strings":["Electrical and Computer Engineering, The University of Texas at Dallas, United States"],"raw_orcid":"https://orcid.org/0000-0003-1933-1590","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, The University of Texas at Dallas, United States","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017966455","display_name":"Lucas Goncalves","orcid":"https://orcid.org/0000-0001-9613-1002"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lucas Goncalves","raw_affiliation_strings":["Electrical and Computer Engineering, The University of Texas at Dallas, United States"],"raw_orcid":"https://orcid.org/0000-0001-9613-1002","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, The University of Texas at Dallas, United States","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040793194","display_name":"Carlos Busso","orcid":"https://orcid.org/0000-0002-4075-4072"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos Busso","raw_affiliation_strings":["Electrical and Computer Engineering, The University of Texas at Dallas, United States"],"raw_orcid":"https://orcid.org/0000-0002-4075-4072","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, The University of Texas at Dallas, United States","institution_ids":["https://openalex.org/I162577319"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5070819601"],"corresponding_institution_ids":["https://openalex.org/I162577319"],"apc_list":null,"apc_paid":null,"fwci":0.2631,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.68125,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"207","last_page":"215"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8432561159133911},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5469326376914978},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5331043004989624},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5251383781433105},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49808573722839355},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.48535671830177307},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.43545016646385193},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4340859651565552},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42429620027542114},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4115551710128784},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33980751037597656}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8432561159133911},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5469326376914978},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5331043004989624},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5251383781433105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49808573722839355},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.48535671830177307},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.43545016646385193},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4340859651565552},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42429620027542114},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4115551710128784},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33980751037597656},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3577190.3614110","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3577190.3614110","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614110","source":{"id":"https://openalex.org/S4363608440","display_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3577190.3614110","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3577190.3614110","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3577190.3614110","source":{"id":"https://openalex.org/S4363608440","display_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G6189572392","display_name":"CCRI: Medium: MSP-Podcast: Creating The Largest Speech Emotional Database By Leveraging Existing Naturalistic Recordings","funder_award_id":"2016719","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8082180408","display_name":null,"funder_award_id":"CNS-2016719","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387421359.pdf","grobid_xml":"https://content.openalex.org/works/W4387421359.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W1970155934","https://openalex.org/W1981395202","https://openalex.org/W1985867508","https://openalex.org/W1992475611","https://openalex.org/W2068124150","https://openalex.org/W2109255472","https://openalex.org/W2117752179","https://openalex.org/W2143350951","https://openalex.org/W2164699598","https://openalex.org/W2401417847","https://openalex.org/W2525412388","https://openalex.org/W2592549418","https://openalex.org/W2619383789","https://openalex.org/W2619941915","https://openalex.org/W2730845691","https://openalex.org/W2742542661","https://openalex.org/W2747874407","https://openalex.org/W2787247660","https://openalex.org/W2887761937","https://openalex.org/W2897337310","https://openalex.org/W2962931510","https://openalex.org/W2964051877","https://openalex.org/W2965373594","https://openalex.org/W2972852081","https://openalex.org/W2980282514","https://openalex.org/W2997026866","https://openalex.org/W2997258743","https://openalex.org/W3014163061","https://openalex.org/W3088631780","https://openalex.org/W3095118468","https://openalex.org/W3114214226","https://openalex.org/W3164582967","https://openalex.org/W3184369217","https://openalex.org/W3198220993","https://openalex.org/W3207579445","https://openalex.org/W4287777846","https://openalex.org/W4309799497","https://openalex.org/W4312976151","https://openalex.org/W4361994820","https://openalex.org/W4376632472","https://openalex.org/W6922016914"],"related_works":["https://openalex.org/W2053269318","https://openalex.org/W2364370872","https://openalex.org/W2185469136","https://openalex.org/W2025614924","https://openalex.org/W2294335174","https://openalex.org/W2097963413","https://openalex.org/W3145575561","https://openalex.org/W2001275470","https://openalex.org/W2073996508","https://openalex.org/W4281476908"],"abstract_inverted_index":{"Most":[0],"existing":[1],"audio-text":[2,147],"emotion":[3,218],"recognition":[4,219],"studies":[5],"have":[6],"focused":[7],"on":[8,79,133],"the":[9,17,28,35,38,80,91,102,111,134,146,152,155,161,172,180,189,193],"computational":[10],"modeling":[11],"aspects,":[12],"including":[13],"strategies":[14,60,73],"for":[15,117,126,168],"fusing":[16],"modalities.":[18,173,186],"An":[19],"area":[20],"that":[21,138],"has":[22],"received":[23],"less":[24],"attention":[25,158],"is":[26,90,223],"understanding":[27],"role":[29],"of":[30,77,87,154],"proper":[31],"temporal":[32,127,141],"synchronization":[33,170],"between":[34,171],"modalities":[36],"in":[37,160],"model":[39,46,190],"performance.":[40],"This":[41],"study":[42,89],"presents":[43],"a":[44,49,114,215],"transformer-based":[45,162],"designed":[47],"with":[48,70,74,192],"word-chunk":[50],"concept,":[51],"which":[52,96],"offers":[53],"an":[54,122],"ideal":[55],"framework":[56],"to":[57,61,100,144,166,184],"explore":[58],"different":[59,75,115],"align":[62],"text":[63],"and":[64,210],"speech.":[65],"The":[66,157,221],"approach":[67,112,163],"creates":[68],"chunks":[69,103,148],"alternative":[71],"alignment":[72,94,116,142,201],"levels":[76],"dependency":[78],"underlying":[81],"lexical":[82,106,177],"boundaries.":[83,107],"A":[84],"key":[85],"contribution":[86],"this":[88],"multi-scale":[92,195],"chunk":[93,196],"strategy,":[95],"generates":[97,113],"random":[98,200],"alignments":[99],"create":[101,145],"without":[104],"considering":[105],"For":[108],"every":[109],"epoch,":[110],"each":[118],"sentence,":[119],"serving":[120],"as":[121],"effective":[123],"regularization":[124,197],"method":[125],"dependency.":[128],"Our":[129],"experimental":[130],"results":[131],"based":[132],"MSP-Podcast":[135],"corpus":[136],"indicate":[137],"providing":[139],"precise":[140],"information":[143],"does":[149],"not":[150],"improve":[151],"performance":[153],"system.":[156],"mechanisms":[159],"are":[164],"able":[165],"compensate":[167],"imperfect":[169],"However,":[174],"using":[175,199],"exact":[176],"boundaries":[178],"makes":[179],"system":[181],"highly":[182],"vulnerable":[183],"missing":[185,208],"In":[187],"contrast,":[188],"trained":[191],"proposed":[194],"strategy":[198],"can":[202],"significantly":[203],"increase":[204],"its":[205],"robustness":[206],"against":[207],"data":[209],"remain":[211],"effective,":[212],"even":[213],"under":[214],"single":[216],"audio-only":[217],"task.":[220],"code":[222],"available":[224],"at:":[225],"https://github.com/winston-lin-wei-cheng/MultiScale-Chunk-Regularization":[226]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
