{"id":"https://openalex.org/W4375869455","doi":"https://doi.org/10.1109/icassp49357.2023.10096641","title":"Adaptive Mask Co-Optimization for Modal Dependence in Multimodal Learning","display_name":"Adaptive Mask Co-Optimization for Modal Dependence in Multimodal Learning","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869455","doi":"https://doi.org/10.1109/icassp49357.2023.10096641"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096641","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096641","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056431851","display_name":"Ying Zhou","orcid":"https://orcid.org/0000-0001-7678-5703"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ying Zhou","raw_affiliation_strings":["Xidian University,School of Artificial Intelligence,China","School of Artificial Intelligence, Xidian University, China"],"affiliations":[{"raw_affiliation_string":"Xidian University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083213551","display_name":"Xuefeng Liang","orcid":"https://orcid.org/0000-0002-1448-0477"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuefeng Liang","raw_affiliation_strings":["Xidian University,School of Artificial Intelligence,China","Pazhou Lab, Huangpu, China","School of Artificial Intelligence, Xidian University, China"],"affiliations":[{"raw_affiliation_string":"Xidian University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"Pazhou Lab, Huangpu, China","institution_ids":[]},{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000700303","display_name":"Shiquan Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiquan Zheng","raw_affiliation_strings":["Xidian University,School of Artificial Intelligence,China","School of Artificial Intelligence, Xidian University, China"],"affiliations":[{"raw_affiliation_string":"Xidian University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112912500","display_name":"Huijun Xuan","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huijun Xuan","raw_affiliation_strings":["Xidian University,School of Artificial Intelligence,China","School of Artificial Intelligence, Xidian University, China"],"affiliations":[{"raw_affiliation_string":"Xidian University,School of Artificial Intelligence,China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046467174","display_name":"Takatsune Kumada","orcid":"https://orcid.org/0000-0002-0198-0501"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]},{"id":"https://openalex.org/I39012071","display_name":"Kyoto College of Graduate Studies for Informatics","ror":"https://ror.org/05mzj8a56","country_code":"JP","type":"education","lineage":["https://openalex.org/I39012071"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takatsune Kumada","raw_affiliation_strings":["Kyoto University,IST, Graduate School of Informatics,Japan","IST, Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University,IST, Graduate School of Informatics,Japan","institution_ids":["https://openalex.org/I39012071","https://openalex.org/I22299242"]},{"raw_affiliation_string":"IST, Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5056431851"],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":0.5224,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.66000978,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.9417219161987305},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7641562223434448},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.5597302913665771},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5596426129341125},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5567225217819214},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4442433714866638},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3392142951488495}],"concepts":[{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.9417219161987305},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7641562223434448},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.5597302913665771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5596426129341125},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5567225217819214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4442433714866638},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3392142951488495},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096641","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096641","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1832693441","https://openalex.org/W1983364832","https://openalex.org/W2085662862","https://openalex.org/W2146334809","https://openalex.org/W2964300796","https://openalex.org/W3007282427","https://openalex.org/W3035333188","https://openalex.org/W3104219743","https://openalex.org/W3168463823","https://openalex.org/W3173396651","https://openalex.org/W3176157254","https://openalex.org/W3196591432","https://openalex.org/W3205272809","https://openalex.org/W3212002421","https://openalex.org/W3213732694","https://openalex.org/W3214127792","https://openalex.org/W4220887861","https://openalex.org/W4221143151","https://openalex.org/W4221154966","https://openalex.org/W4287550992","https://openalex.org/W4312639100","https://openalex.org/W6784187948","https://openalex.org/W6784500203","https://openalex.org/W6796149595","https://openalex.org/W6797528369","https://openalex.org/W6803709490","https://openalex.org/W6803986650","https://openalex.org/W6810226654"],"related_works":["https://openalex.org/W4389505417","https://openalex.org/W4301143707","https://openalex.org/W2952745240","https://openalex.org/W2962931510","https://openalex.org/W127837312","https://openalex.org/W2922283411","https://openalex.org/W4384789578","https://openalex.org/W4380551887","https://openalex.org/W4285159263","https://openalex.org/W4387421317"],"abstract_inverted_index":{"Multimodal":[0],"learning":[1],"has":[2],"demonstrated":[3],"a":[4,51],"great":[5],"advantage":[6],"in":[7,117],"emotion":[8],"recognition":[9],"tasks":[10],"due":[11],"to":[12,24,32,42,73,83],"the":[13,37,71,90,103],"richer":[14],"information":[15],"from":[16],"different":[17],"modalities.":[18],"However,":[19],"multimodal":[20],"models":[21,93,112],"may":[22],"incline":[23],"rely":[25],"on":[26,94,102],"some":[27],"modalities":[28,39,76,81,96],"that":[29],"are":[30],"easier":[31],"be":[33,61,84],"learned,":[34],"while":[35],"under-fit":[36],"other":[38,75],"and":[40],"lead":[41],"sub-optimal":[43],"results.":[44],"To":[45],"address":[46],"this":[47],"problem,":[48],"we":[49],"propose":[50],"novel":[52],"plug-in":[53],"module,":[54],"Adaptive":[55],"Mask":[56],"Co-optimization":[57],"(AMCo),":[58],"which":[59],"could":[60],"inserted":[62],"into":[63],"advanced":[64],"models.":[65],"The":[66,86,99],"adaptive":[67],"mask":[68],"can":[69,88,108],"encourage":[70],"model":[72],"fit":[74],"better":[77],"by":[78,113],"making":[79],"dependent":[80,95],"harder":[82],"learned.":[85],"cooptimization":[87],"preserve":[89],"performance":[91],"of":[92,119],"without":[97],"degradation.":[98],"extensive":[100],"experiments":[101],"IEMOCAP":[104],"dataset":[105],"show":[106],"AMCo":[107],"improve":[109],"four":[110],"state-of-the-art":[111],"1.14%":[114],"~":[115],"3.03%":[116],"terms":[118],"accuracy.":[120]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
