{"id":"https://openalex.org/W4372260395","doi":"https://doi.org/10.1109/icassp49357.2023.10096586","title":"Using Auxiliary Tasks In Multimodal Fusion of Wav2vec 2.0 And Bert for Multimodal Emotion Recognition","display_name":"Using Auxiliary Tasks In Multimodal Fusion of Wav2vec 2.0 And Bert for Multimodal Emotion Recognition","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260395","doi":"https://doi.org/10.1109/icassp49357.2023.10096586"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096586","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096586","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015083720","display_name":"Dekai Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dekai Sun","raw_affiliation_strings":["Harbin Institute of Technology,Harbin,China","Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Harbin,China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103325403","display_name":"Yancheng He","orcid":"https://orcid.org/0009-0003-5078-0447"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yancheng He","raw_affiliation_strings":["Harbin Institute of Technology,Harbin,China","Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Harbin,China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101508830","display_name":"Jiqing Han","orcid":"https://orcid.org/0000-0002-4297-4300"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiqing Han","raw_affiliation_strings":["Harbin Institute of Technology,Harbin,China","Harbin Institute of Technology, Harbin, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology,Harbin,China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Harbin, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5015083720"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":5.4793,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96227409,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":1.0,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7671493887901306},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7357099056243896},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7079227566719055},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6938806772232056},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.6004334092140198},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.5524472594261169},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5193139910697937},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.502086877822876},{"id":"https://openalex.org/keywords/fusion-mechanism","display_name":"Fusion mechanism","score":0.45641475915908813},{"id":"https://openalex.org/keywords/multimodal-therapy","display_name":"Multimodal therapy","score":0.4431779384613037},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3942141532897949},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34331274032592773},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.334075391292572},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.060271382331848145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7671493887901306},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7357099056243896},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7079227566719055},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6938806772232056},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.6004334092140198},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.5524472594261169},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5193139910697937},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.502086877822876},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.45641475915908813},{"id":"https://openalex.org/C4441509","wikidata":"https://www.wikidata.org/wiki/Q6418787","display_name":"Multimodal therapy","level":2,"score":0.4431779384613037},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3942141532897949},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34331274032592773},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.334075391292572},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.060271382331848145},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C141071460","wikidata":"https://www.wikidata.org/wiki/Q40821","display_name":"Surgery","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C103038307","wikidata":"https://www.wikidata.org/wiki/Q6556360","display_name":"Lipid bilayer fusion","level":3,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096586","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096586","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1995663108","https://openalex.org/W2123119128","https://openalex.org/W2146334809","https://openalex.org/W2164699598","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2997258743","https://openalex.org/W3036601975","https://openalex.org/W3096690837","https://openalex.org/W3096723250","https://openalex.org/W3128376221","https://openalex.org/W3128513378","https://openalex.org/W3160183718","https://openalex.org/W3197642003","https://openalex.org/W4224920209","https://openalex.org/W4225635674","https://openalex.org/W4297841872","https://openalex.org/W4297841880","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W3157841754","https://openalex.org/W4381827277","https://openalex.org/W4390136517","https://openalex.org/W3167558523","https://openalex.org/W3120825179","https://openalex.org/W3092201840","https://openalex.org/W2014028898","https://openalex.org/W4384202059","https://openalex.org/W2364749629","https://openalex.org/W4313042734"],"abstract_inverted_index":{"The":[0],"lack":[1,53],"of":[2,7,47,54,59],"data":[3],"and":[4,36,41,92,98,118],"the":[5,52,57,76,87,94,104,124],"difficulty":[6,58],"multimodal":[8,15,60],"fusion":[9,72,89],"have":[10],"always":[11],"been":[12],"challenges":[13],"for":[14,33,38],"emotion":[16],"recognition":[17],"(MER).":[18],"In":[19],"this":[20],"paper,":[21],"we":[22,62,80,108],"propose":[23],"to":[24,49,85,96,103],"use":[25,63],"pre-trained":[26],"models":[27],"as":[28,69],"upstream":[29],"network,":[30],"wav2vec":[31],"2.0":[32],"audio":[34],"modality":[35],"BERT":[37],"text":[39],"modality,":[40],"finetune":[42],"them":[43],"in":[44],"downstream":[45,71],"task":[46,78],"MER":[48,77],"cope":[50],"with":[51],"data.":[55],"For":[56],"fusion,":[61],"a":[64,70,110],"K-layer":[65],"multi-head":[66],"attention":[67],"mechanism":[68],"module.":[73],"Starting":[74],"from":[75],"itself,":[79],"design":[81],"two":[82],"auxiliary":[83],"tasks":[84],"alleviate":[86],"insufficient":[88],"between":[90],"modalities":[91],"guide":[93],"network":[95],"capture":[97],"align":[99],"emotion-related":[100],"features.":[101],"Compared":[102],"previous":[105],"state-of-the-art":[106],"models,":[107],"achieve":[109],"better":[111],"performance":[112],"by":[113],"78.42%":[114],"Weighted":[115],"Accuracy":[116,121],"(WA)":[117],"79.71%":[119],"Unweighted":[120],"(UA)":[122],"on":[123],"IEMOCAP":[125],"dataset.":[126]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
