{"id":"https://openalex.org/W4403780491","doi":"https://doi.org/10.1145/3664647.3681599","title":"DQ-Former: Querying Transformer with Dynamic Modality Priority for Cognitive-aligned Multimodal Emotion Recognition in Conversation","display_name":"DQ-Former: Querying Transformer with Dynamic Modality Priority for Cognitive-aligned Multimodal Emotion Recognition in Conversation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403780491","doi":"https://doi.org/10.1145/3664647.3681599"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681599","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000087241","display_name":"\u91ce\u6751 \u6587\u656c","orcid":"https://orcid.org/0009-0007-7715-8630"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ye Jing","raw_affiliation_strings":["the State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Science &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"the State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Science &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102636467","display_name":"Xinpei Zhao","orcid":"https://orcid.org/0009-0000-6597-7582"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinpei Zhao","raw_affiliation_strings":["the State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Science &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"the State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Science &amp; School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5000087241"],"corresponding_institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.132,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.88135043,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4795","last_page":"4804"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.8489964008331299},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6808375716209412},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6729460954666138},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6710749268531799},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.4347476661205292},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3901996612548828},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3784346580505371},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.37493330240249634},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3255297541618347},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14849194884300232},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.1332157850265503},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13091355562210083},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.09858298301696777},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08718535304069519}],"concepts":[{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.8489964008331299},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6808375716209412},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6729460954666138},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6710749268531799},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.4347476661205292},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3901996612548828},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3784346580505371},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.37493330240249634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3255297541618347},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14849194884300232},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.1332157850265503},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13091355562210083},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.09858298301696777},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08718535304069519},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681599","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2000162405","https://openalex.org/W2912083425","https://openalex.org/W2948947170","https://openalex.org/W2963686995","https://openalex.org/W2964010806","https://openalex.org/W2964051877","https://openalex.org/W2964300796","https://openalex.org/W3036857024","https://openalex.org/W3037572520","https://openalex.org/W3093051361","https://openalex.org/W3169801598","https://openalex.org/W3173396651","https://openalex.org/W3209984917","https://openalex.org/W3211488063","https://openalex.org/W4212774754","https://openalex.org/W4221147459","https://openalex.org/W4221154966","https://openalex.org/W4226380987","https://openalex.org/W4297510826","https://openalex.org/W4304091726","https://openalex.org/W4319862479","https://openalex.org/W4360930863","https://openalex.org/W4385570058","https://openalex.org/W4385570630","https://openalex.org/W4385571170","https://openalex.org/W4385571916","https://openalex.org/W4389520025","https://openalex.org/W4389523777","https://openalex.org/W4391020222"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W1968552888","https://openalex.org/W2374116601","https://openalex.org/W3093134843","https://openalex.org/W1511346092","https://openalex.org/W1527532029","https://openalex.org/W2378167147","https://openalex.org/W3210777354","https://openalex.org/W2281307425","https://openalex.org/W2461480269"],"abstract_inverted_index":{"Multimodal":[0],"Emotion":[1],"Recognition":[2],"in":[3,14],"Conversations":[4],"aims":[5],"to":[6,99],"understand":[7],"the":[8,50,66,157,165],"human":[9,53],"emotion":[10,73,106,176],"of":[11,20,52,58,69,95,105,112,141,144,159],"each":[12,62,130],"utterance":[13],"a":[15,81,92,169],"conversation":[16],"from":[17,122,146],"different":[18,70,110,123,142],"types":[19],"data,":[21],"such":[22],"as":[23,41],"speech":[24],"and":[25,64,101,138,153,171],"text.":[26],"Previous":[27],"works":[28],"mainly":[29],"focus":[30],"on":[31,151],"either":[32],"complex":[33],"unimodal":[34,114],"feature":[35],"extraction":[36],"or":[37],"sophisticated":[38],"fusion":[39,85,132,140],"techniques":[40],"general":[42],"multimodal":[43,84,173],"classification":[44],"tasks":[45],"do.":[46],"However,":[47],"they":[48],"ignore":[49],"process":[51,135],"perception,":[54],"neglecting":[55],"various":[56,103],"levels":[57,143],"emotional":[59,120],"features":[60,121],"within":[61],"modality":[63,127],"disregarding":[65],"unique":[67],"contributions":[68],"modalities":[71,124],"for":[72,175],"recognition.":[74,177],"To":[75],"address":[76],"these":[77,119],"issues,":[78],"we":[79],"propose":[80],"more":[82],"cognitive-aligned":[83],"framework,":[86],"namely":[87],"DQ-Former.":[88,160],"Specifically,":[89],"DQ-Former":[90],"utilizes":[91],"small":[93],"set":[94],"learnable":[96],"query":[97],"tokens":[98],"collate":[100],"condense":[102],"granularities":[104],"cues":[107],"embedded":[108],"at":[109,129],"layers":[111],"pre-trained":[113],"models.":[115],"Subsequently,":[116],"it":[117],"integrates":[118],"with":[125],"dynamic":[126],"priorities":[128],"intermediate":[131],"layer.":[133],"This":[134],"enables":[136],"explicit":[137],"effective":[139],"information":[145],"diverse":[147],"modalities.":[148],"Extensive":[149],"experiments":[150],"MELD":[152],"IEMOCAP":[154],"datasets":[155],"validate":[156],"effectiveness":[158],"Our":[161],"results":[162],"show":[163],"that":[164],"proposed":[166],"method":[167],"achieves":[168],"robust":[170],"interpretable":[172],"representation":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
