{"id":"https://openalex.org/W4206748079","doi":"https://doi.org/10.1109/tmm.2022.3142448","title":"Deep Multimodal Sequence Fusion by Regularized Expressive Representation Distillation","display_name":"Deep Multimodal Sequence Fusion by Regularized Expressive Representation Distillation","publication_year":2022,"publication_date":"2022-01-13","ids":{"openalex":"https://openalex.org/W4206748079","doi":"https://doi.org/10.1109/tmm.2022.3142448"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3142448","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3142448","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030227077","display_name":"Xiaobao Guo","orcid":"https://orcid.org/0000-0002-3427-8540"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Xiaobao Guo","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University Singapore, Singapore, Singapore","Rapid-Rich Object Search (ROSE) Laboratory, Interdisciplinary Graduate Programme, Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]},{"raw_affiliation_string":"Rapid-Rich Object Search (ROSE) Laboratory, Interdisciplinary Graduate Programme, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047970310","display_name":"Adams Wai\u2010Kin Kong","orcid":"https://orcid.org/0000-0002-9728-9511"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Adams Wai-Kin Kong","raw_affiliation_strings":["Rapid-Rich Object Search (ROSE) Laboratory, Interdisciplinary Graduate Programme, Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Rapid-Rich Object Search (ROSE) Laboratory, Interdisciplinary Graduate Programme, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080977911","display_name":"Alex C. Kot","orcid":"https://orcid.org/0000-0001-6262-8125"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Alex Kot","raw_affiliation_strings":["Rapid-Rich Object Search (ROSE) Laboratory, Interdisciplinary Graduate Programme, Nanyang Technological University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Rapid-Rich Object Search (ROSE) Laboratory, Interdisciplinary Graduate Programme, Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030227077"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":2.9718,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.91479858,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"25","issue":null,"first_page":"2085","last_page":"2096"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8280773162841797},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.7242229580879211},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6878255605697632},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5874929428100586},{"id":"https://openalex.org/keywords/crossmodal","display_name":"Crossmodal","score":0.49938058853149414},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48851144313812256},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33490824699401855},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33239251375198364},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.22213459014892578}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8280773162841797},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.7242229580879211},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6878255605697632},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5874929428100586},{"id":"https://openalex.org/C60115397","wikidata":"https://www.wikidata.org/wiki/Q5188732","display_name":"Crossmodal","level":4,"score":0.49938058853149414},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48851144313812256},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33490824699401855},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33239251375198364},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.22213459014892578},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2022.3142448","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3142448","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320766","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1585160083","https://openalex.org/W1587447546","https://openalex.org/W1651753422","https://openalex.org/W1989085630","https://openalex.org/W2024051019","https://openalex.org/W2095176743","https://openalex.org/W2122563357","https://openalex.org/W2191779130","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2341528187","https://openalex.org/W2519656895","https://openalex.org/W2583643061","https://openalex.org/W2584487694","https://openalex.org/W2740550900","https://openalex.org/W2787581402","https://openalex.org/W2799041689","https://openalex.org/W2802895140","https://openalex.org/W2883409523","https://openalex.org/W2884585870","https://openalex.org/W2885005742","https://openalex.org/W2896766043","https://openalex.org/W2940259008","https://openalex.org/W2946165673","https://openalex.org/W2949391930","https://openalex.org/W2958722525","https://openalex.org/W2962897020","https://openalex.org/W2963252191","https://openalex.org/W2963710346","https://openalex.org/W2964010806","https://openalex.org/W2964051877","https://openalex.org/W2964216663","https://openalex.org/W2964260444","https://openalex.org/W2971050617","https://openalex.org/W2997573100","https://openalex.org/W3007282427","https://openalex.org/W3034727271","https://openalex.org/W3034849760","https://openalex.org/W3035333188","https://openalex.org/W3037572520","https://openalex.org/W3093051361","https://openalex.org/W3096609285","https://openalex.org/W3101998545","https://openalex.org/W3124054989","https://openalex.org/W3171516518","https://openalex.org/W3177318507","https://openalex.org/W6640212811","https://openalex.org/W6680230698","https://openalex.org/W6682962330","https://openalex.org/W6719667659","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6778883912","https://openalex.org/W6784333009"],"related_works":["https://openalex.org/W1574414179","https://openalex.org/W4362597605","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4297676672","https://openalex.org/W4281702477","https://openalex.org/W4378510483","https://openalex.org/W2989932438","https://openalex.org/W4387297750","https://openalex.org/W2186333919"],"abstract_inverted_index":{"Multimodal":[0],"sequence":[1,64,186],"learning":[2],"aims":[3,77],"to":[4,10,41,46,56,78,84,91,100,116,132,139],"utilize":[5],"information":[6,27],"from":[7],"different":[8],"modalities":[9],"enhance":[11,85],"overall":[12],"performance.":[13],"Mainstream":[14],"works":[15],"often":[16],"follow":[17],"an":[18],"intermediate-fusion":[19],"pipeline,":[20],"which":[21,157],"explores":[22],"both":[23,48],"modality-specific":[24],"and":[25,33,51,54,83,138],"modality-supplementary":[26],"for":[28,160,183],"fusion.":[29,89],"However,":[30],"the":[31,42,58,86,105,118,134,141,154,172],"unaligned":[32],"heterogeneously":[34],"distributed":[35],"multimodal":[36,63,81,126,155,185],"sequences":[37],"pose":[38],"significant":[39],"challenges":[40],"fusion":[43,145,162],"task:":[44],"1)":[45],"extract":[47],"effective":[49,80],"unimodal":[50,93,96,106],"crossmodal":[52,123],"representations":[53,82,97,107,148],"2)":[55],"overcome":[57],"overfitting":[59,119],"issue":[60,120],"in":[61,121,190],"joint":[62,122],"optimization.":[65],"In":[66],"this":[67],"work,":[68],"we":[69],"propose":[70],"regularized":[71],"expressive":[72,135],"representation":[73,94,136],"distillation":[74,102,112],"(RERD)":[75],"that":[76,171],"seek":[79],"generalization":[87],"of":[88,153],"First,":[90],"improve":[92],"learning,":[95],"are":[98,108,158],"assigned":[99],"multi-head":[101],"encoders,":[103],"where":[104],"iteratively":[109],"updated":[110],"through":[111],"attention":[113],"layers.":[114],"Second,":[115],"alleviate":[117],"optimization,":[124],"a":[125,150],"sinkhorn":[127],"distance":[128],"regularizer":[129],"is":[130],"proposed":[131,173],"reinforce":[133],"extraction":[137],"reduce":[140],"modality":[142],"gap":[143],"before":[144],"adaptively.":[146],"These":[147],"produce":[149],"comprehensive":[151],"view":[152],"sequences,":[156],"utilized":[159],"downstream":[161],"tasks.":[163],"Experimental":[164],"results":[165],"on":[166],"several":[167],"popular":[168],"benchmarks":[169],"demonstrate":[170],"method":[174],"achieves":[175],"state-of-the-art":[176],"performance,":[177],"compared":[178],"with":[179],"widely":[180],"used":[181],"baselines":[182],"deep":[184],"fusion,":[187],"as":[188],"shown":[189],"<uri":[191],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[192],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/Redaimao/RERD</uri>":[193],".":[194]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
