{"id":"https://openalex.org/W4285044837","doi":"https://doi.org/10.21437/interspeech.2022-10603","title":"End-to-End Binaural Speech Synthesis","display_name":"End-to-End Binaural Speech Synthesis","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4285044837","doi":"https://doi.org/10.21437/interspeech.2022-10603"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-10603","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10603","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000437338","display_name":"Wen\u2010Chin Huang","orcid":"https://orcid.org/0000-0003-2342-9099"},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wen Chin Huang","raw_affiliation_strings":["Nagoya University, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nagoya University, Japan","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003700236","display_name":"Dejan Markovi\u0107","orcid":"https://orcid.org/0000-0002-6744-7531"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dejan Markovic","raw_affiliation_strings":["Meta Reality Labs Research, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112845052","display_name":"Alexander Richard","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Richard","raw_affiliation_strings":["Meta Reality Labs Research, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072548521","display_name":"Israel D. Gebru","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Israel Dejene Gebru","raw_affiliation_strings":["Meta Reality Labs Research, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059034612","display_name":"Anjali Kondur Menon","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anjali Menon","raw_affiliation_strings":["Meta Reality Labs Research, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research, USA","institution_ids":["https://openalex.org/I4210128585"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0382,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.78160615,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1218","last_page":"1222"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6708949208259583},{"id":"https://openalex.org/keywords/binaural-recording","display_name":"Binaural recording","score":0.6339701414108276},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5406003594398499},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5297093391418457},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.43814581632614136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14492961764335632}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6708949208259583},{"id":"https://openalex.org/C201247586","wikidata":"https://www.wikidata.org/wiki/Q5612967","display_name":"Binaural recording","level":2,"score":0.6339701414108276},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5406003594398499},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5297093391418457},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.43814581632614136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14492961764335632}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-10603","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10603","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W124275677","https://openalex.org/W1481955708","https://openalex.org/W1497371565","https://openalex.org/W2020883660","https://openalex.org/W2101298784","https://openalex.org/W2143270321","https://openalex.org/W2519091744","https://openalex.org/W2616802817","https://openalex.org/W2760103357","https://openalex.org/W2775336875","https://openalex.org/W2787579267","https://openalex.org/W2935711438","https://openalex.org/W2963799213","https://openalex.org/W2970006822","https://openalex.org/W2970906079","https://openalex.org/W2971074500","https://openalex.org/W3015268401","https://openalex.org/W3034742263","https://openalex.org/W3036843665","https://openalex.org/W3043163188","https://openalex.org/W3092028330","https://openalex.org/W3138953166","https://openalex.org/W3140429000","https://openalex.org/W3160970338","https://openalex.org/W3163662330","https://openalex.org/W3215615641","https://openalex.org/W4205225513","https://openalex.org/W4205788663","https://openalex.org/W4225271852","https://openalex.org/W4244179866"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2766995619","https://openalex.org/W4224270619","https://openalex.org/W2579722767","https://openalex.org/W2151749779","https://openalex.org/W2168148781","https://openalex.org/W1991848873","https://openalex.org/W2026165661"],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3,88],"present":[4],"an":[5,53,61,103],"end-to-end":[6],"binaural":[7,20,63],"speech":[8,27],"synthesis":[9],"system":[10,59],"that":[11,22,73],"combines":[12],"a":[13,18,41,69],"low-bitrate":[14],"audio":[15],"codec":[16],"with":[17,47,65],"powerful":[19],"decoder":[21],"is":[23,40],"capable":[24],"of":[25,92],"accurate":[26],"binauralization":[28],"while":[29],"faithfully":[30],"reconstructing":[31],"environmental":[32],"factors":[33],"like":[34],"ambient":[35],"noise":[36],"or":[37],"reverb.The":[38],"network":[39],"modified":[42],"vectorquantized":[43],"variational":[44],"autoencoder,":[45],"trained":[46],"several":[48],"carefully":[49],"designed":[50],"objectives,":[51],"including":[52],"adversarial":[54,94],"loss.We":[55],"evaluate":[56],"the":[57,74,78,90,93],"proposed":[58,75],"on":[60],"internal":[62],"dataset":[64],"objective":[66],"metrics":[67],"and":[68],"perceptual":[70],"study.Results":[71],"show":[72],"approach":[76],"matches":[77],"ground":[79],"truth":[80],"data":[81],"more":[82],"closely":[83],"than":[84],"previous":[85],"methods.In":[86],"particular,":[87],"demonstrate":[89],"capability":[91],"loss":[95],"in":[96],"capturing":[97],"environment":[98],"effects":[99],"needed":[100],"to":[101],"create":[102],"authentic":[104],"auditory":[105],"scene.":[106]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
