{"id":"https://openalex.org/W4404261856","doi":"https://doi.org/10.1162/tacl.a.628","title":"VoiceBench: Benchmarking LLM-Based Voice Assistants","display_name":"VoiceBench: Benchmarking LLM-Based Voice Assistants","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W4404261856","doi":"https://doi.org/10.1162/tacl.a.628"},"language":"en","primary_location":{"id":"doi:10.1162/tacl.a.628","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl.a.628","pdf_url":null,"source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1162/tacl.a.628","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100333357","display_name":"Yiming Chen","orcid":"https://orcid.org/0009-0002-3658-2754"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yiming Chen","raw_affiliation_strings":["National University of Singapore, Singapore. yiming.chen@u.nus.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore. yiming.chen@u.nus.edu","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060201191","display_name":"Xianghu Yue","orcid":"https://orcid.org/0000-0003-3527-6034"},"institutions":[{"id":"https://openalex.org/I2799321569","display_name":"Tianjin Conservatory of Music","ror":"https://ror.org/039dz9590","country_code":"CN","type":"education","lineage":["https://openalex.org/I2799321569"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianghu Yue","raw_affiliation_strings":["Tianjin University, China. yuexianghu@tju.edu.cn"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tianjin University, China. yuexianghu@tju.edu.cn","institution_ids":["https://openalex.org/I2799321569"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115595935","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0003-0988-8723"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Chen Zhang","raw_affiliation_strings":["National University of Singapore, Singapore. chen_zhang@u.nus.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore. chen_zhang@u.nus.edu","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101856962","display_name":"Xiaoxue Gao","orcid":"https://orcid.org/0000-0003-1920-5228"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiaoxue Gao","raw_affiliation_strings":["I2R, Agency for Science, Technology, and Research (A*STAR), Singapore. Gao_Xiaoxue@a-star.edu.sg"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"I2R, Agency for Science, Technology, and Research (A*STAR), Singapore. Gao_Xiaoxue@a-star.edu.sg","institution_ids":["https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111887396","display_name":"Robby T. Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Robby T. Tan","raw_affiliation_strings":["National University of Singapore, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["School of Artificial Intelligence, The Chinese University of Hong Kong, Shenzhen, China","Shenzhen Research Institute of Big Data, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]},{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, China","institution_ids":["https://openalex.org/I4210099586"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100333357"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":24.3498,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.97521034,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"14","issue":null,"first_page":"378","last_page":"398"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.9635000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.9635000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9532999992370605,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8902702331542969},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4316590428352356},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.36428314447402954},{"id":"https://openalex.org/keywords/marketing","display_name":"Marketing","score":0.09179946780204773}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8902702331542969},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4316590428352356},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.36428314447402954},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.09179946780204773}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/tacl.a.628","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl.a.628","pdf_url":null,"source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2410.17196","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.17196","pdf_url":"https://arxiv.org/pdf/2410.17196","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.17196","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.17196","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl.a.628","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl.a.628","pdf_url":null,"source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4238897586","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W2059640416","https://openalex.org/W1490753184","https://openalex.org/W2284465472","https://openalex.org/W2291782699"],"abstract_inverted_index":{"Abstract":[0],"Recent":[1],"advancements":[2],"in":[3,98,103,139],"large":[4],"language":[5],"models":[6,129],"(LLMs)":[7],"like":[8],"GPT-4o":[9],"have":[10],"enabled":[11],"real-time":[12],"speech":[13,35],"interactions":[14,36],"through":[15],"LLM-based":[16,55,126],"voice":[17,56,127],"assistants,":[18],"offering":[19],"an":[20],"improved":[21],"user":[22],"experience":[23],"over":[24],"text-based":[25],"interactions.":[26],"However,":[27],"a":[28],"suitable":[29],"benchmark":[30,50],"to":[31,53,79],"rigorously":[32],"evaluate":[33],"such":[34,116],"systems":[37],"is":[38],"currently":[39],"lacking.":[40],"To":[41],"bridge":[42],"this":[43,140],"gap,":[44],"we":[45],"introduce":[46],"VoiceBench,":[47],"the":[48,122],"first":[49],"specifically":[51],"designed":[52],"assess":[54,80],"assistants.":[57],"VoiceBench":[58,92],"comprises":[59],"6,783":[60],"synthetic":[61],"and":[62,88,113,130,137],"real":[63],"spoken":[64,99],"instructions":[65,75],"recorded":[66],"from":[67],"diverse":[68],"speakers":[69],"across":[70],"eight":[71],"distinct":[72],"tasks.":[73],"These":[74],"are":[76],"meticulously":[77],"crafted":[78],"three":[81],"crucial":[82],"capability":[83],"areas:":[84],"general":[85],"knowledge,":[86],"instruction-following,":[87],"safety":[89],"compliance.":[90],"Furthermore,":[91],"systematically":[93],"incorporates":[94],"realistic":[95],"variations":[96],"common":[97],"interactions,":[100],"including":[101],"differences":[102],"speaker":[104],"characteristics":[105],"(e.g.,":[106,111],"accents),":[107],"heterogeneous":[108],"environmental":[109],"conditions":[110],"reverberation),":[112],"content":[114],"complexities":[115],"as":[117],"mispronunciations.":[118],"Extensive":[119],"experiments":[120],"reveal":[121],"limitations":[123],"of":[124],"current":[125],"assistant":[128],"offer":[131],"valuable":[132],"insights":[133],"for":[134],"future":[135],"research":[136],"development":[138],"field.1":[141]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
