{"id":"https://openalex.org/W3095828473","doi":"https://doi.org/10.21437/interspeech.2020-2520","title":"Attention Forcing for Speech Synthesis","display_name":"Attention Forcing for Speech Synthesis","publication_year":2020,"publication_date":"2020-10-25","ids":{"openalex":"https://openalex.org/W3095828473","doi":"https://doi.org/10.21437/interspeech.2020-2520","mag":"3095828473"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2020-2520","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-2520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033136699","display_name":"Qingyun Dou","orcid":"https://orcid.org/0000-0001-8479-7210"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qingyun Dou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089802621","display_name":"Joshua Efiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joshua Efiong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5050766679","display_name":"Mark Gales","orcid":"https://orcid.org/0000-0002-5311-8219"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark J.F. Gales","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5033136699"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8118,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.79637452,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"4014","last_page":"4018"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.8154362440109253},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6318511962890625},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4466870427131653},{"id":"https://openalex.org/keywords/climatology","display_name":"Climatology","score":0.1751454770565033},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.10667568445205688}],"concepts":[{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.8154362440109253},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6318511962890625},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4466870427131653},{"id":"https://openalex.org/C49204034","wikidata":"https://www.wikidata.org/wiki/Q52139","display_name":"Climatology","level":1,"score":0.1751454770565033},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.10667568445205688}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2020-2520","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2020-2520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2020","raw_type":"proceedings-article"},{"id":"pmh:oai:generic.eprints.org:1263931","is_oa":false,"landing_page_url":"http://publications.eng.cam.ac.uk/1263931/","pdf_url":null,"source":{"id":"https://openalex.org/S4406922847","display_name":"Cambridge University Engineering Department Publications Database","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W648786980","https://openalex.org/W854541894","https://openalex.org/W1924770834","https://openalex.org/W2042360461","https://openalex.org/W2524251915","https://openalex.org/W2542835211","https://openalex.org/W2794490148","https://openalex.org/W2803229097","https://openalex.org/W2912613132","https://openalex.org/W2962762462","https://openalex.org/W2962834107","https://openalex.org/W2964243274","https://openalex.org/W2964308564","https://openalex.org/W2970730223"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2971214053","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2021517900","https://openalex.org/W2376932109"],"abstract_inverted_index":{"Auto-regressive":[0],"sequence-to-sequence":[1],"models":[2,18,126],"with":[3,28,90,168],"attention":[4,84,133,158,186],"mechanisms":[5],"have":[6,52],"achieved":[7],"state-of-the-art":[8],"performance":[9],"in":[10,197],"various":[11],"tasks":[12,177],"including":[13],"speech":[14,155,198],"synthesis.":[15],"Training":[16],"these":[17,70],"can":[19,47],"be":[20,43],"difficult.":[21],"The":[22],"standard":[23,115],"approach":[24,99],"guides":[25,87],"a":[26,74,108,111,148],"model":[27,89,162],"the":[29,38,62,88,91,101,105,118,129,138,143,154,161,169,190],"reference":[30,96,139],"output":[31,40,64,93,130,140,165],"history":[32,41,94,141],"during":[33,36],"training.":[34],"However":[35],"synthesis":[37],"generated":[39,63,92],"must":[42],"used.":[44],"This":[45,81,98],"mismatch":[46,103],"impact":[48],"performance.":[49],"Several":[50],"approaches":[51,71],"been":[53],"proposed":[54],"to":[55,124,142,163],"handle":[56],"this,":[57],"normally":[58],"by":[59],"selectively":[60],"using":[61,147],"history.":[65,131],"To":[66],"make":[67],"training":[68,116,180],"stable,":[69],"often":[72,122],"require":[73],"heuristic":[75],"schedule":[76,109],"or":[77,110],"an":[78],"auxiliary":[79],"classifier.":[80,112],"paper":[82],"introduces":[83],"forcing,":[85],"which":[86,152,171],"and":[95,193],"attention.":[97],"reduces":[100],"training-evaluation":[102],"without":[104],"need":[106],"for":[107,114,174],"Additionally,":[113],"approaches,":[117],"frame":[119,150,191],"rate":[120],"is":[121,172],"reduced":[123],"prevent":[125],"from":[127],"copying":[128],"As":[132],"forcing":[134,159,187],"does":[135],"not":[136],"feed":[137],"model,":[144],"it":[145],"allows":[146,160,188],"higher":[149],"rate,":[151,192],"improves":[153],"quality.":[156,199],"Finally,":[157],"generate":[164],"sequences":[166],"aligned":[167],"references,":[170],"important":[173],"some":[175],"down-stream":[176],"such":[178],"as":[179],"neural":[181],"vocoders.":[182],"Experiments":[183],"show":[184],"that":[185],"doubling":[189],"yields":[194],"significant":[195],"gain":[196]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
