{"id":"https://openalex.org/W4385569627","doi":"https://doi.org/10.1109/taslp.2023.3301212","title":"Parallel Synthesis for Autoregressive Speech Generation","display_name":"Parallel Synthesis for Autoregressive Speech Generation","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4385569627","doi":"https://doi.org/10.1109/taslp.2023.3301212"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3301212","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3301212","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101935970","display_name":"Po-chun Hsu","orcid":"https://orcid.org/0000-0002-6104-3907"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Po-chun Hsu","raw_affiliation_strings":["Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084868332","display_name":"Da-Rong Liu","orcid":"https://orcid.org/0009-0005-0090-7408"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Da-rong Liu","raw_affiliation_strings":["Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018050919","display_name":"Andy T. Liu","orcid":"https://orcid.org/0000-0002-2502-3992"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Andy T. Liu","raw_affiliation_strings":["Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040508737","display_name":"Hung-yi Lee","orcid":"https://orcid.org/0000-0002-9654-5747"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-yi Lee","raw_affiliation_strings":["Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, College of Electrical Engineering and Computer Science, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101935970"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":1.049,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.81083828,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"31","issue":null,"first_page":"3095","last_page":"3111"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.9296236038208008},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7212486267089844},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6154226064682007},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5853895545005798},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4397771954536438},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.43405455350875854},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4181773066520691},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3093366026878357},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20565789937973022},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1145218014717102}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.9296236038208008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7212486267089844},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6154226064682007},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5853895545005798},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4397771954536438},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43405455350875854},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4181773066520691},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3093366026878357},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20565789937973022},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1145218014717102}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3301212","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3301212","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":122,"referenced_works":["https://openalex.org/W95152782","https://openalex.org/W1494895475","https://openalex.org/W1521637132","https://openalex.org/W1522301498","https://openalex.org/W2102003408","https://openalex.org/W2107860279","https://openalex.org/W2111284386","https://openalex.org/W2118774185","https://openalex.org/W2118806033","https://openalex.org/W2120847449","https://openalex.org/W2143280761","https://openalex.org/W2148770129","https://openalex.org/W2172065531","https://openalex.org/W2395578248","https://openalex.org/W2423557781","https://openalex.org/W2471520273","https://openalex.org/W2519091744","https://openalex.org/W2587284713","https://openalex.org/W2598638573","https://openalex.org/W2767206889","https://openalex.org/W2784918340","https://openalex.org/W2785516183","https://openalex.org/W2805217154","https://openalex.org/W2885820941","https://openalex.org/W2888169323","https://openalex.org/W2889606145","https://openalex.org/W2890983311","https://openalex.org/W2912237252","https://openalex.org/W2945478979","https://openalex.org/W2947445680","https://openalex.org/W2953318193","https://openalex.org/W2954386831","https://openalex.org/W2963091184","https://openalex.org/W2963103134","https://openalex.org/W2963300588","https://openalex.org/W2963434219","https://openalex.org/W2963542120","https://openalex.org/W2963609956","https://openalex.org/W2963654953","https://openalex.org/W2963830550","https://openalex.org/W2963975282","https://openalex.org/W2964041258","https://openalex.org/W2964243274","https://openalex.org/W2969937244","https://openalex.org/W2970006822","https://openalex.org/W2993118648","https://openalex.org/W2994373303","https://openalex.org/W2996797022","https://openalex.org/W2999160446","https://openalex.org/W3008758407","https://openalex.org/W3015338123","https://openalex.org/W3033411150","https://openalex.org/W3034363136","https://openalex.org/W3034794073","https://openalex.org/W3034999214","https://openalex.org/W3092028330","https://openalex.org/W3095419383","https://openalex.org/W3095459301","https://openalex.org/W3095851005","https://openalex.org/W3096702751","https://openalex.org/W3097538987","https://openalex.org/W3097828251","https://openalex.org/W3097895838","https://openalex.org/W3100270690","https://openalex.org/W3103913581","https://openalex.org/W3105124182","https://openalex.org/W3109064156","https://openalex.org/W3122318757","https://openalex.org/W3123097577","https://openalex.org/W3129651364","https://openalex.org/W3140429000","https://openalex.org/W3141224548","https://openalex.org/W3144035034","https://openalex.org/W3154504973","https://openalex.org/W3161704465","https://openalex.org/W3162770051","https://openalex.org/W3163662330","https://openalex.org/W3197294703","https://openalex.org/W4225304461","https://openalex.org/W4231807801","https://openalex.org/W4283388932","https://openalex.org/W4283817920","https://openalex.org/W4286899907","https://openalex.org/W4287694050","https://openalex.org/W4289305009","https://openalex.org/W4289761690","https://openalex.org/W4292779060","https://openalex.org/W4294619240","https://openalex.org/W4298580827","https://openalex.org/W4320013936","https://openalex.org/W4377079846","https://openalex.org/W4385245566","https://openalex.org/W6603838645","https://openalex.org/W6631190155","https://openalex.org/W6711777497","https://openalex.org/W6733471323","https://openalex.org/W6739901393","https://openalex.org/W6746208923","https://openalex.org/W6748409065","https://openalex.org/W6752910514","https://openalex.org/W6753855596","https://openalex.org/W6756159577","https://openalex.org/W6762533536","https://openalex.org/W6766837696","https://openalex.org/W6767111847","https://openalex.org/W6771024825","https://openalex.org/W6771719367","https://openalex.org/W6773094808","https://openalex.org/W6774645763","https://openalex.org/W6776390925","https://openalex.org/W6778823374","https://openalex.org/W6778883912","https://openalex.org/W6781251213","https://openalex.org/W6782760101","https://openalex.org/W6783182287","https://openalex.org/W6783867762","https://openalex.org/W6786325012","https://openalex.org/W6802838302","https://openalex.org/W6839643428","https://openalex.org/W6843673214","https://openalex.org/W6917585676","https://openalex.org/W6943912960"],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2038083449","https://openalex.org/W2562096895","https://openalex.org/W2333799855","https://openalex.org/W3177678247","https://openalex.org/W1999617572","https://openalex.org/W2944572343","https://openalex.org/W2351687372","https://openalex.org/W4307784074"],"abstract_inverted_index":{"Autoregressive":[0],"neural":[1],"vocoders":[2],"have":[3],"achieved":[4],"outstanding":[5],"performance":[6],"and":[7,18,76,81,111,223,260,270],"are":[8],"widely":[9],"used":[10],"in":[11,74,99,155,175,186],"speech":[12,71,122,145,248,279],"synthesis":[13,52],"tasks":[14],"such":[15],"as":[16],"text-to-speech":[17],"voice":[19],"conversion.":[20],"An":[21],"autoregressive":[22,92,108,113,171,236,259],"vocoder":[23],"predicts":[24],"a":[25,87,100,121,135,212],"sample":[26,217],"at":[27,34],"some":[28],"time":[29,36,53,72,101,180],"step":[30],"conditioned":[31,137],"on":[32,138,230],"those":[33],"previous":[35],"steps.":[37],"Though":[38],"it":[39],"can":[40,146,246],"generate":[41],"highly":[42],"natural":[43],"human":[44],"speech,":[45],"the":[46,51,56,69,91,103,139,165,170,179,182,187,195,200,231,234,243,257,263],"iterative":[47],"generation":[48,109,114],"inevitably":[49],"makes":[50],"proportional":[54,193],"to":[55,60,67,116,173,194,199,216],"utterance":[57,123],"length,":[58],"leading":[59],"low":[61],"efficiency.":[62],"Many":[63],"works":[64],"were":[65],"dedicated":[66],"generating":[68],"whole":[70],"sequence":[73],"parallel":[75],"then":[77,147],"proposed":[78,86,104,132,188,235,244,264],"GAN-based,":[79],"flow-based,":[80],"score-based":[82],"vocoders.":[83],"This":[84],"paper":[85],"new":[88],"thought":[89],"for":[90],"generation.":[93],"Instead":[94],"of":[95,184,202,233],"iteratively":[96,163],"predicting":[97],"samples":[98],"sequence,":[102],"model":[105,133,189,245,265],"performs":[106],"frequency-wise":[107],"(FAR)":[110],"bit-wise":[112],"(BAR)":[115],"synthesize":[117,247],"speech.":[118],"In":[119],"FAR,":[120],"is":[124,161,190,207,214,227],"first":[125,166],"split":[126],"into":[127],"different":[128],"frequency":[129],"subbands.":[130,153],"The":[131,204,238],"generates":[134],"subband":[136],"previously":[140],"generated":[141,152,162],"one.":[142],"A":[143],"full-band":[144],"be":[148],"reconstructed":[149],"from":[150,164,220,282],"these":[151],"Similarly,":[154],"BAR,":[156],"an":[157],"8-bit":[158],"quantized":[159],"signal":[160],"bit.":[167],"By":[168],"redesigning":[169],"method":[172],"compute":[174],"domains":[176],"other":[177],"than":[178,250],"domain,":[181],"number":[183,201],"iterations":[185],"no":[191],"longer":[192],"utterance's":[196],"length":[197],"but":[198],"subbands/bits.":[203],"inference":[205],"efficiency":[206],"hence":[208],"significantly":[209],"increased.":[210],"Besides,":[211],"post-filter":[213],"employed":[215],"audio":[218],"signals":[219],"output":[221],"posteriors,":[222],"its":[224],"training":[225],"objective":[226],"designed":[228],"based":[229],"characteristics":[232],"methods.":[237],"experimental":[239],"results":[240,269],"show":[241],"that":[242],"faster":[249],"real-time":[251],"without":[252],"GPU":[253],"acceleration.":[254],"Compared":[255],"with":[256],"baseline":[258],"non-autoregressive":[261],"vocoders,":[262],"achieves":[266],"better":[267],"MUSHRA":[268],"shows":[271],"good":[272],"generalization":[273],"ability":[274],"while":[275],"synthesizing":[276],"44":[277],"kHz":[278],"or":[280],"utterances":[281],"unseen":[283],"speakers.":[284]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
