{"id":"https://openalex.org/W4401452051","doi":"https://doi.org/10.1109/lsp.2024.3440956","title":"Very Low Complexity Speech Synthesis Using Framewise Autoregressive GAN (FARGAN) With Pitch Prediction","display_name":"Very Low Complexity Speech Synthesis Using Framewise Autoregressive GAN (FARGAN) With Pitch Prediction","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4401452051","doi":"https://doi.org/10.1109/lsp.2024.3440956"},"language":"en","primary_location":{"id":"doi:10.1109/lsp.2024.3440956","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2024.3440956","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021269794","display_name":"Jean-Marc Valin","orcid":"https://orcid.org/0000-0002-9883-6927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jean-Marc Valin","raw_affiliation_strings":["Xiph.Org Foundation, Jaffrey, NH, USA"],"raw_orcid":"https://orcid.org/0000-0002-9883-6927","affiliations":[{"raw_affiliation_string":"Xiph.Org Foundation, Jaffrey, NH, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102436093","display_name":"Ahmed Mustafa","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed Mustafa","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA, USA"],"raw_orcid":"https://orcid.org/0009-0005-4820-8446","affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042244085","display_name":"Jan B\u00fcthe","orcid":"https://orcid.org/0000-0001-5872-4325"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jan B\u00fcthe","raw_affiliation_strings":["Amazon Web Services, Palo Alto, CA, USA"],"raw_orcid":"https://orcid.org/0009-0003-9684-1567","affiliations":[{"raw_affiliation_string":"Amazon Web Services, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8328,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87435577,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"31","issue":null,"first_page":"2115","last_page":"2119"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.7567166090011597},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6617405414581299},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6335269212722778},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.5177564024925232},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.47119662165641785},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.39027658104896545},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2617400288581848},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.15852516889572144}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.7567166090011597},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6617405414581299},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6335269212722778},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.5177564024925232},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.47119662165641785},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39027658104896545},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2617400288581848},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.15852516889572144}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2024.3440956","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2024.3440956","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2013020033","https://openalex.org/W2016589492","https://openalex.org/W2519091744","https://openalex.org/W2593414223","https://openalex.org/W2750167318","https://openalex.org/W2775336875","https://openalex.org/W2895654193","https://openalex.org/W2963091184","https://openalex.org/W2964243274","https://openalex.org/W2970006822","https://openalex.org/W2985308740","https://openalex.org/W2998498479","https://openalex.org/W3025844872","https://openalex.org/W3092028330","https://openalex.org/W3092791109","https://openalex.org/W3096468295","https://openalex.org/W3160077247","https://openalex.org/W3161236344","https://openalex.org/W3197273793","https://openalex.org/W3198020407","https://openalex.org/W4221155904","https://openalex.org/W4226421894","https://openalex.org/W4375868823","https://openalex.org/W4392903975","https://openalex.org/W6713478990","https://openalex.org/W6732429163","https://openalex.org/W6748409065","https://openalex.org/W6767111847","https://openalex.org/W6777664437","https://openalex.org/W6777781272","https://openalex.org/W6778625279","https://openalex.org/W6778672582","https://openalex.org/W6779192484","https://openalex.org/W6783867762","https://openalex.org/W6802838302","https://openalex.org/W6838843145"],"related_works":["https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W1911859126","https://openalex.org/W2189341500","https://openalex.org/W2120730869","https://openalex.org/W2166699153","https://openalex.org/W2541680182"],"abstract_inverted_index":{"Neural":[0],"vocoders":[1],"are":[2],"now":[3],"being":[4],"used":[5],"in":[6,58],"a":[7],"wide":[8],"range":[9],"of":[10,16,50,93],"speech":[11,57],"processing":[12],"applications.":[13],"In":[14,38],"many":[15],"those":[17],"applications,":[18],"the":[19,23,62,70],"vocoder":[20,46,75],"can":[21,32,76],"be":[22],"most":[24],"complex":[25],"component,":[26],"so":[27],"finding":[28],"lower":[29,82],"complexity":[30,83],"algorithms":[31],"lead":[33],"to":[34,54],"significant":[35],"practical":[36],"benefits.":[37],"this":[39],"work,":[40],"we":[41],"propose":[42],"FARGAN,":[43],"an":[44],"autoregressive":[45],"that":[47,69,92],"takes":[48],"advantage":[49],"long-term":[51],"pitch":[52],"prediction":[53],"synthesize":[55],"high-quality":[56],"small":[59],"subframes,":[60],"without":[61],"need":[63],"for":[64],"teacher-forcing.":[65],"Experimental":[66],"results":[67],"show":[68],"proposed":[71],"600":[72],"MFLOPS":[73],"FARGAN":[74],"achieve":[77],"both":[78],"higher":[79],"quality":[80,89],"and":[81],"than":[84],"existing":[85,94],"low-complexity":[86],"vocoders.":[87,96],"The":[88],"even":[90],"matches":[91],"higher-complexity":[95]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
