{"id":"https://openalex.org/W3136316642","doi":"https://doi.org/10.1109/taslp.2021.3065852","title":"End-to-End Recurrent Cross-Modality Attention for Video Dialogue","display_name":"End-to-End Recurrent Cross-Modality Attention for Video Dialogue","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3136316642","doi":"https://doi.org/10.1109/taslp.2021.3065852","mag":"3136316642"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3065852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3065852","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000769601","display_name":"Yun-Wei Chu","orcid":"https://orcid.org/0000-0003-4443-070X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yun-Wei Chu","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA"],"raw_orcid":"https://orcid.org/0000-0003-4443-070X","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049989751","display_name":"Kuan-Yen Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuan-Yen Lin","raw_affiliation_strings":["Cornell Tech, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cornell Tech, New York, NY, USA","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025002883","display_name":"Chao-Chun Hsu","orcid":"https://orcid.org/0000-0003-4117-2119"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chao-Chun Hsu","raw_affiliation_strings":["Department of Computer Science, University of Chicago, Chicago, IL, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029671916","display_name":"Lun\u2010Wei Ku","orcid":"https://orcid.org/0000-0003-2691-5404"},"institutions":[{"id":"https://openalex.org/I4210098366","display_name":"Institute of Information Science, Academia Sinica","ror":"https://ror.org/00z83z196","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210098366","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Lun-Wei Ku","raw_affiliation_strings":["IIS, Academia Sinica, Taipei, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIS, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210098366"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000769601"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.02588505,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"29","issue":null,"first_page":"2456","last_page":"2464"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.813015341758728},{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.753821849822998},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6854617595672607},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6361846923828125},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6321756839752197},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5910385251045227},{"id":"https://openalex.org/keywords/converse","display_name":"Converse","score":0.5645120739936829},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5192225575447083},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4933294355869293},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.440758615732193},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.41431325674057007},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3383777141571045},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3213564157485962},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.11919677257537842},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11452218890190125}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.813015341758728},{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.753821849822998},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6854617595672607},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6361846923828125},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6321756839752197},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5910385251045227},{"id":"https://openalex.org/C2776809875","wikidata":"https://www.wikidata.org/wiki/Q1375963","display_name":"Converse","level":2,"score":0.5645120739936829},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5192225575447083},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4933294355869293},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.440758615732193},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.41431325674057007},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3383777141571045},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3213564157485962},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.11919677257537842},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11452218890190125},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2021.3065852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3065852","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2592817252","display_name":null,"funder_award_id":"109-2221-E-001-015-","funder_id":"https://openalex.org/F4320322795","funder_display_name":"Ministry of Science and Technology, Taiwan"},{"id":"https://openalex.org/G8548315871","display_name":null,"funder_award_id":"108-2221-E-001-012-MY3","funder_id":"https://openalex.org/F4320322795","funder_display_name":"Ministry of Science and Technology, Taiwan"}],"funders":[{"id":"https://openalex.org/F4320322795","display_name":"Ministry of Science and Technology, Taiwan","ror":"https://ror.org/02kv4zf79"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":137,"referenced_works":["https://openalex.org/W1488163396","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1586939924","https://openalex.org/W1591706642","https://openalex.org/W1601567445","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1923211482","https://openalex.org/W1933349210","https://openalex.org/W1947481528","https://openalex.org/W1956340063","https://openalex.org/W2095705004","https://openalex.org/W2110933980","https://openalex.org/W2122180654","https://openalex.org/W2123301721","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2154652894","https://openalex.org/W2159243025","https://openalex.org/W2250539671","https://openalex.org/W2293453011","https://openalex.org/W2302086703","https://openalex.org/W2337252826","https://openalex.org/W2396147015","https://openalex.org/W2463565445","https://openalex.org/W2507365558","https://openalex.org/W2523993696","https://openalex.org/W2527349934","https://openalex.org/W2560346187","https://openalex.org/W2560645892","https://openalex.org/W2575842049","https://openalex.org/W2606982687","https://openalex.org/W2607119937","https://openalex.org/W2622980782","https://openalex.org/W2737435850","https://openalex.org/W2737766105","https://openalex.org/W2741903908","https://openalex.org/W2750998636","https://openalex.org/W2768661419","https://openalex.org/W2798786641","https://openalex.org/W2805707570","https://openalex.org/W2808181286","https://openalex.org/W2810643877","https://openalex.org/W2890585349","https://openalex.org/W2891394954","https://openalex.org/W2892245540","https://openalex.org/W2896902935","https://openalex.org/W2900626451","https://openalex.org/W2903594499","https://openalex.org/W2949828251","https://openalex.org/W2951183276","https://openalex.org/W2951619830","https://openalex.org/W2962749469","https://openalex.org/W2962762462","https://openalex.org/W2962854379","https://openalex.org/W2962861647","https://openalex.org/W2962910007","https://openalex.org/W2963082899","https://openalex.org/W2963150162","https://openalex.org/W2963187678","https://openalex.org/W2963287297","https://openalex.org/W2963521239","https://openalex.org/W2963524571","https://openalex.org/W2963541336","https://openalex.org/W2963560969","https://openalex.org/W2963594498","https://openalex.org/W2963623904","https://openalex.org/W2963643760","https://openalex.org/W2963656855","https://openalex.org/W2963686907","https://openalex.org/W2963758027","https://openalex.org/W2963781647","https://openalex.org/W2963791035","https://openalex.org/W2963843052","https://openalex.org/W2963890755","https://openalex.org/W2963954913","https://openalex.org/W2964004697","https://openalex.org/W2964065937","https://openalex.org/W2964067226","https://openalex.org/W2964121744","https://openalex.org/W2964218959","https://openalex.org/W2964308564","https://openalex.org/W2967927722","https://openalex.org/W2969576497","https://openalex.org/W2970355596","https://openalex.org/W2980339970","https://openalex.org/W2981902456","https://openalex.org/W3014487864","https://openalex.org/W3099768435","https://openalex.org/W3099884890","https://openalex.org/W4298392976","https://openalex.org/W6629203210","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6635590879","https://openalex.org/W6639657675","https://openalex.org/W6639809013","https://openalex.org/W6641064462","https://openalex.org/W6677969093","https://openalex.org/W6678262379","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6680053761","https://openalex.org/W6682086655","https://openalex.org/W6682631176","https://openalex.org/W6683512859","https://openalex.org/W6685337303","https://openalex.org/W6687455147","https://openalex.org/W6691503852","https://openalex.org/W6692004142","https://openalex.org/W6693843455","https://openalex.org/W6697449767","https://openalex.org/W6711963698","https://openalex.org/W6719057275","https://openalex.org/W6730380741","https://openalex.org/W6730549503","https://openalex.org/W6731895421","https://openalex.org/W6739177970","https://openalex.org/W6740863234","https://openalex.org/W6743696045","https://openalex.org/W6743792510","https://openalex.org/W6744104549","https://openalex.org/W6745764446","https://openalex.org/W6746003159","https://openalex.org/W6746139478","https://openalex.org/W6746311623","https://openalex.org/W6748686444","https://openalex.org/W6752083267","https://openalex.org/W6752922659","https://openalex.org/W6756477321","https://openalex.org/W6756864573","https://openalex.org/W6757835490","https://openalex.org/W6766272978","https://openalex.org/W6766893790","https://openalex.org/W6767083792"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463","https://openalex.org/W4307407935"],"abstract_inverted_index":{"Visual":[0],"dialogue":[1,20,28,46,74],"systems":[2,21,29],"need":[3],"to":[4,14,40,50,68,85,123],"understand":[5],"dynamic":[6],"visual":[7,96,111],"scenes":[8],"and":[9,62,97,113,144],"comprehend":[10],"semantics":[11],"in":[12,116],"order":[13,67],"converse":[15],"with":[16],"users.":[17],"Constructing":[18],"video":[19,73,93,140],"is":[22,106],"more":[23],"challenging":[24,138],"than":[25],"traditional":[26],"image":[27],"because":[30],"the":[31,45,60,63,70,104,120,137,145,154],"large":[32],"feature":[33],"space":[34],"of":[35,59,72,89,103,119,128],"videos":[36,61],"makes":[37],"it":[38],"difficult":[39],"capture":[41],"semantic":[42],"information.":[43,131],"Furthermore,":[44],"system":[47],"also":[48],"needs":[49],"precisely":[51],"answer":[52,86,101],"users'":[53],"question":[54,105],"based":[55,108],"on":[56,109,136,156],"comprehensive":[57],"understanding":[58,127],"previous":[64],"dialogue.":[65],"In":[66],"improve":[69],"performance":[71],"system,":[75],"we":[76],"proposed":[77,146],"an":[78],"end-to-end":[79],"recurrent":[80],"cross-modality":[81],"attention":[82],"(ReCMA)":[83],"model":[84],"a":[87,92,125,149],"series":[88],"questions":[90],"about":[91],"from":[94],"both":[95,110,129],"textual":[98,114],"modality.":[99],"The":[100],"representation":[102,112,115],"updated":[107],"each":[117],"step":[118],"reasoning":[121],"process":[122],"have":[124],"better":[126],"modalities'":[130],"We":[132],"evaluate":[133],"our":[134],"method":[135],"DSTC7":[139],"scene-aware":[141],"dialog":[142],"dataset":[143],"ReCMA":[147],"achieves":[148],"relative":[150],"20.8%":[151],"improvement":[152],"over":[153],"baseline":[155],"CIDEr.":[157]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
