{"id":"https://openalex.org/W4224903891","doi":"https://doi.org/10.1109/icassp43922.2022.9747056","title":"Gated Multimodal Fusion with Contrastive Learning for Turn-Taking Prediction in Human-Robot Dialogue","display_name":"Gated Multimodal Fusion with Contrastive Learning for Turn-Taking Prediction in Human-Robot Dialogue","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224903891","doi":"https://doi.org/10.1109/icassp43922.2022.9747056"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747056","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101065791","display_name":"Jiudong Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiudong Yang","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039641921","display_name":"Peiying Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peiying Wang","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100434324","display_name":"Yi Zhu","orcid":"https://orcid.org/0000-0003-3000-3918"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Yi Zhu","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China","LTL, University of Cambridge"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"LTL, University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076923788","display_name":"Mingchao Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingchao Feng","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100357859","display_name":"Meng Chen","orcid":"https://orcid.org/0000-0002-6633-9205"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Chen","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101727205","display_name":"Xiaodong He","orcid":"https://orcid.org/0000-0002-9463-9168"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodong He","raw_affiliation_strings":["JD AI,Beijing,China","JD AI, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD AI,Beijing,China","institution_ids":["https://openalex.org/I4210103986"]},{"raw_affiliation_string":"JD AI, Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.142,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.79411609,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7747","last_page":"7751"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8169177174568176},{"id":"https://openalex.org/keywords/turn-taking","display_name":"Turn-taking","score":0.6887577772140503},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6230974197387695},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5432372093200684},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5401711463928223},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5120866298675537},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5119019746780396},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.45076003670692444},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.44553589820861816},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.42071977257728577},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.410772442817688},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4098547697067261},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.3980686664581299},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36977991461753845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8169177174568176},{"id":"https://openalex.org/C2776352735","wikidata":"https://www.wikidata.org/wiki/Q2313343","display_name":"Turn-taking","level":3,"score":0.6887577772140503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6230974197387695},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5432372093200684},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5401711463928223},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5120866298675537},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5119019746780396},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45076003670692444},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.44553589820861816},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.42071977257728577},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.410772442817688},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4098547697067261},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.3980686664581299},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36977991461753845},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747056","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747056","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W316215934","https://openalex.org/W1964725106","https://openalex.org/W1998677696","https://openalex.org/W2008741806","https://openalex.org/W2085662862","https://openalex.org/W2095705004","https://openalex.org/W2194775991","https://openalex.org/W2250891614","https://openalex.org/W2402762749","https://openalex.org/W2748406667","https://openalex.org/W2786387151","https://openalex.org/W2889445100","https://openalex.org/W2910984410","https://openalex.org/W2963093689","https://openalex.org/W2963150162","https://openalex.org/W2963747517","https://openalex.org/W2972438655","https://openalex.org/W2973109987","https://openalex.org/W2973135621","https://openalex.org/W2973137193","https://openalex.org/W2973172454","https://openalex.org/W3005680577","https://openalex.org/W3094393093","https://openalex.org/W3102393842","https://openalex.org/W3112188842","https://openalex.org/W3156636935","https://openalex.org/W4385245566","https://openalex.org/W6649804639","https://openalex.org/W6674330103","https://openalex.org/W6713254909","https://openalex.org/W6739901393","https://openalex.org/W6758270796","https://openalex.org/W6774314701","https://openalex.org/W6787253756","https://openalex.org/W6794687422"],"related_works":["https://openalex.org/W159740367","https://openalex.org/W2228992124","https://openalex.org/W2613954729","https://openalex.org/W3130118443","https://openalex.org/W1955947659","https://openalex.org/W2759378137","https://openalex.org/W2376974064","https://openalex.org/W3188579947","https://openalex.org/W4289529495","https://openalex.org/W4386695291"],"abstract_inverted_index":{"Turn-taking,":[0],"aiming":[1],"to":[2,34,46,83,119,129,144,154],"decide":[3],"when":[4],"the":[5,35,57,75,84,131,164,167],"next":[6],"speaker":[7],"can":[8,27],"start":[9],"talking,":[10],"is":[11,117],"an":[12],"essential":[13],"component":[14],"in":[15,61,105],"building":[16],"human-robot":[17,103],"spoken":[18],"dialogue":[19],"systems.":[20],"Previous":[21],"studies":[22],"indicate":[23],"that":[24],"multi-modal":[25],"cues":[26],"facilitate":[28],"this":[29,88],"challenging":[30],"task.":[31],"However,":[32],"due":[33],"paucity":[36],"of":[37,77,171],"public":[38],"multimodal":[39,53,114],"datasets,":[40],"current":[41],"methods":[42],"are":[43,161],"mostly":[44,72],"limited":[45],"either":[47],"utilizing":[48],"unimodal":[49],"features":[50],"or":[51],"simplistic":[52],"ensemble":[54],"models.":[55],"Besides,":[56],"inherent":[58],"class":[59],"imbalance":[60,133],"real":[62,102],"scenario,":[63],"e.g.":[64],"sentence":[65],"ending":[66],"with":[67,99],"short":[68],"pause":[69],"will":[70],"be":[71],"regarded":[73],"as":[74],"end":[76],"turn,":[78],"also":[79],"poses":[80],"great":[81],"challenge":[82],"turn-taking":[85,98,125],"decision.":[86],"In":[87],"paper,":[89],"we":[90,135],"first":[91],"collect":[92],"a":[93,111,137],"large-scale":[94],"annotated":[95],"corpus":[96],"for":[97,124],"over":[100,174],"5,000":[101],"dialogues":[104],"speech":[106],"and":[107,150,163,169],"text":[108],"modalities.":[109],"Then,":[110],"novel":[112],"gated":[113],"fusion":[115],"mechanism":[116],"devised":[118],"utilize":[120],"various":[121],"information":[122],"seamlessly":[123],"prediction.":[126],"More":[127],"importantly,":[128],"tackle":[130],"data":[132,141],"issue,":[134],"design":[136],"simple":[138],"yet":[139],"effective":[140],"augmentation":[142],"method":[143],"construct":[145],"negative":[146],"instances":[147],"without":[148],"supervision":[149],"apply":[151],"contrastive":[152],"learning":[153],"obtain":[155],"better":[156],"feature":[157],"representations.":[158],"Extensive":[159],"experiments":[160],"conducted":[162],"results":[165],"demonstrate":[166],"superiority":[168],"competitiveness":[170],"our":[172],"model":[173],"several":[175],"state-of-the-art":[176],"baselines.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
