{"id":"https://openalex.org/W4392902922","doi":"https://doi.org/10.1109/icassp48485.2024.10447196","title":"Turn-Taking and Backchannel Prediction with Acoustic and Large Language Model Fusion","display_name":"Turn-Taking and Backchannel Prediction with Acoustic and Large Language Model Fusion","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902922","doi":"https://doi.org/10.1109/icassp48485.2024.10447196"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447196","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447196","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101979902","display_name":"Jinhan Wang","orcid":"https://orcid.org/0000-0003-1930-2271"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinhan Wang","raw_affiliation_strings":["University of California,Los Angeles,USA","University of California, Los Angeles, USA"],"affiliations":[{"raw_affiliation_string":"University of California,Los Angeles,USA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"University of California, Los Angeles, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100336360","display_name":"Long Chen","orcid":"https://orcid.org/0000-0001-6148-9709"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Long Chen","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013650472","display_name":"Aparna Khare","orcid":"https://orcid.org/0000-0001-7151-3055"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aparna Khare","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109046112","display_name":"Anirudh Raju","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anirudh Raju","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076504502","display_name":"Pranav Dheram","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pranav Dheram","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017672130","display_name":"Di He","orcid":"https://orcid.org/0009-0008-5025-7062"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Di He","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060740546","display_name":"Minhua Wu","orcid":"https://orcid.org/0000-0001-9798-2984"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Minhua Wu","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060979948","display_name":"Andreas Stolcke","orcid":"https://orcid.org/0000-0002-9925-905X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Stolcke","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060593241","display_name":"Venkatesh Ravichandran","orcid":"https://orcid.org/0009-0001-7214-2919"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Venkatesh Ravichandran","raw_affiliation_strings":["Amazon Alexa AI,USA","Amazon Alexa AI, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Alexa AI,USA","institution_ids":["https://openalex.org/I1311688040"]},{"raw_affiliation_string":"Amazon Alexa AI, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101979902"],"corresponding_institution_ids":["https://openalex.org/I161318765"],"apc_list":null,"apc_paid":null,"fwci":2.0672,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87998278,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"12121","last_page":"12125"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.8085861206054688},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7680066823959351},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6426230669021606},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6220818161964417},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.579508900642395},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5323206782341003},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.5155322551727295},{"id":"https://openalex.org/keywords/turn-taking","display_name":"Turn-taking","score":0.47047096490859985},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4610075354576111},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45729315280914307},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.45523786544799805},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.4273136258125305},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39260971546173096},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.1146324872970581},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09141632914543152},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0821690559387207}],"concepts":[{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.8085861206054688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7680066823959351},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6426230669021606},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6220818161964417},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.579508900642395},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5323206782341003},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.5155322551727295},{"id":"https://openalex.org/C2776352735","wikidata":"https://www.wikidata.org/wiki/Q2313343","display_name":"Turn-taking","level":3,"score":0.47047096490859985},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4610075354576111},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45729315280914307},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.45523786544799805},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.4273136258125305},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39260971546173096},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.1146324872970581},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09141632914543152},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0821690559387207},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447196","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447196","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W50922381","https://openalex.org/W57349375","https://openalex.org/W1558276682","https://openalex.org/W2102759648","https://openalex.org/W2166637769","https://openalex.org/W2275246416","https://openalex.org/W2513669042","https://openalex.org/W2748406667","https://openalex.org/W2748416658","https://openalex.org/W2783089003","https://openalex.org/W2786387151","https://openalex.org/W2888997666","https://openalex.org/W2889231094","https://openalex.org/W2895434601","https://openalex.org/W2963093689","https://openalex.org/W2979826702","https://openalex.org/W3016140135","https://openalex.org/W3094393093","https://openalex.org/W3095319910","https://openalex.org/W3102393842","https://openalex.org/W3112188842","https://openalex.org/W3168867926","https://openalex.org/W3209059054","https://openalex.org/W4224903891","https://openalex.org/W4241891521","https://openalex.org/W4284676027","https://openalex.org/W4297841778","https://openalex.org/W4297841817","https://openalex.org/W4323655724","https://openalex.org/W4382246105","https://openalex.org/W6629726534","https://openalex.org/W6796581206","https://openalex.org/W6800875267"],"related_works":["https://openalex.org/W3174008653","https://openalex.org/W2358842429","https://openalex.org/W2765804957","https://openalex.org/W2962716343","https://openalex.org/W4288099861","https://openalex.org/W2893411096","https://openalex.org/W4224919006","https://openalex.org/W43702919","https://openalex.org/W4213400064","https://openalex.org/W4288263119"],"abstract_inverted_index":{"We":[0,46],"propose":[1],"an":[2],"approach":[3,37,73],"for":[4,61,83],"continuous":[5],"prediction":[6],"of":[7,77],"turn-taking":[8],"and":[9,65,80,87,92],"backchanneling":[10],"locations":[11],"in":[12],"spoken":[13],"dialogue":[14],"by":[15],"fusing":[16],"a":[17,22,49,84],"neural":[18],"acoustic":[19,81],"model":[20,25],"with":[21,43],"large":[23],"language":[24],"(LLM).":[26],"Experiments":[27],"on":[28],"the":[29,40,63,75],"Switchboard":[30],"human-human":[31],"conversation":[32],"dataset":[33],"demonstrate":[34],"that":[35],"our":[36],"consistently":[38],"outperforms":[39],"baseline":[41],"models":[42,82],"single":[44],"modality.":[45],"also":[47],"develop":[48],"novel":[50],"multi-task":[51],"instruction":[52],"fine-tuning":[53],"strategy":[54],"to":[55,69],"further":[56],"benefit":[57],"from":[58],"LLM-encoded":[59],"knowledge":[60],"understanding":[62],"tasks":[64],"conversational":[66,88],"contexts,":[67],"leading":[68],"additional":[70],"improvements.":[71],"Our":[72],"demonstrates":[74],"potential":[76],"combined":[78],"LLMs":[79],"more":[85],"natural":[86],"interaction":[89],"between":[90],"humans":[91],"speech-enabled":[93],"AI":[94],"agents.":[95]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
