{"id":"https://openalex.org/W4392902885","doi":"https://doi.org/10.1109/icassp48485.2024.10446241","title":"Multi-Task Learning for Front-End Text Processing in TTS","display_name":"Multi-Task Learning for Front-End Text Processing in TTS","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902885","doi":"https://doi.org/10.1109/icassp48485.2024.10446241"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446241","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446241","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052061361","display_name":"Wonjune Kang","orcid":"https://orcid.org/0000-0001-5260-9094"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wonjune Kang","raw_affiliation_strings":["Massachusetts Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100311585","display_name":"Yun Wang","orcid":"https://orcid.org/0009-0001-9348-7879"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yun Wang","raw_affiliation_strings":["AI at Meta"],"affiliations":[{"raw_affiliation_string":"AI at Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100415451","display_name":"Shun Zhang","orcid":"https://orcid.org/0009-0004-3319-6596"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shun Zhang","raw_affiliation_strings":["AI at Meta"],"affiliations":[{"raw_affiliation_string":"AI at Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006161509","display_name":"Arthur Hinsvark","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arthur Hinsvark","raw_affiliation_strings":["AI at Meta"],"affiliations":[{"raw_affiliation_string":"AI at Meta","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100734672","display_name":"Qing He","orcid":"https://orcid.org/0000-0001-8833-5398"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qing He","raw_affiliation_strings":["AI at Meta"],"affiliations":[{"raw_affiliation_string":"AI at Meta","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5052061361"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.3626,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61645273,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"10796","last_page":"10800"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8454357385635376},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6152648329734802},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.612361490726471},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6061203479766846},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5574808716773987},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4795267581939697},{"id":"https://openalex.org/keywords/front-and-back-ends","display_name":"Front and back ends","score":0.46698421239852905},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.44747987389564514},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.43002229928970337},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36850178241729736}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8454357385635376},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6152648329734802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.612361490726471},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6061203479766846},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5574808716773987},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4795267581939697},{"id":"https://openalex.org/C53016008","wikidata":"https://www.wikidata.org/wiki/Q620167","display_name":"Front and back ends","level":2,"score":0.46698421239852905},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.44747987389564514},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.43002229928970337},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36850178241729736},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446241","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446241","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1589180892","https://openalex.org/W1836465849","https://openalex.org/W2008652694","https://openalex.org/W2090755665","https://openalex.org/W2117130368","https://openalex.org/W2164107060","https://openalex.org/W2164280277","https://openalex.org/W2517566275","https://openalex.org/W2604561121","https://openalex.org/W2745788963","https://openalex.org/W2747175885","https://openalex.org/W2794592252","https://openalex.org/W2890774895","https://openalex.org/W2913340405","https://openalex.org/W2924677654","https://openalex.org/W2945656493","https://openalex.org/W2948947170","https://openalex.org/W2963336393","https://openalex.org/W2963854351","https://openalex.org/W3016224472","https://openalex.org/W3154516348","https://openalex.org/W3185051381","https://openalex.org/W3212496399","https://openalex.org/W4221166186","https://openalex.org/W4295276571","https://openalex.org/W4384918448","https://openalex.org/W4385245566","https://openalex.org/W4392903542","https://openalex.org/W6684316753","https://openalex.org/W6685145238","https://openalex.org/W6697152374","https://openalex.org/W6729005282","https://openalex.org/W6753030114","https://openalex.org/W6757817989","https://openalex.org/W6768021236","https://openalex.org/W6771137081","https://openalex.org/W6809893488","https://openalex.org/W6854866820"],"related_works":["https://openalex.org/W2591697403","https://openalex.org/W2944728705","https://openalex.org/W2904022177","https://openalex.org/W2359348847","https://openalex.org/W2032233321","https://openalex.org/W3011538607","https://openalex.org/W4294432981","https://openalex.org/W4321441197","https://openalex.org/W3121970507","https://openalex.org/W2110028391"],"abstract_inverted_index":{"We":[0,49,141],"propose":[1],"a":[2,17,34,38,52,120,125,134,156],"multi-task":[3,79],"learning":[4],"(MTL)":[5],"model":[6,55,89],"for":[7,133],"jointly":[8],"performing":[9],"three":[10,93],"tasks":[11,94],"that":[12,40,86,143],"are":[13],"commonly":[14,157],"solved":[15],"in":[16,130],"text-to-speech":[18],"(TTS)":[19],"front-end:":[20],"text":[21],"normalization":[22],"(TN),":[23],"part-of-speech":[24],"(POS)":[25],"tagging,":[26],"and":[27,61,64,138],"homograph":[28],"disambiguation":[29],"(HD).":[30],"Our":[31],"framework":[32],"utilizes":[33],"tree-like":[35],"structure":[36],"with":[37],"trunk":[39],"learns":[41],"shared":[42],"representations,":[43],"followed":[44],"by":[45],"separate":[46],"task-specific":[47],"heads.":[48],"further":[50],"incorporate":[51],"pre-trained":[53],"language":[54],"to":[56,67,74,101],"utilize":[57],"its":[58,70],"built-in":[59],"lexical":[60],"contextual":[62],"knowledge,":[63],"study":[65],"how":[66],"best":[68],"use":[69],"embeddings":[71],"so":[72],"as":[73],"most":[75],"effectively":[76],"benefit":[77],"our":[78,87,114],"model.":[80],"Through":[81],"task-wise":[82],"ablations,":[83],"we":[84,118],"show":[85],"full":[88],"trained":[90,103],"on":[91,104],"all":[92],"achieves":[95],"the":[96,111],"strongest":[97],"overall":[98],"performance":[99,152],"compared":[100],"models":[102],"individual":[105],"or":[106],"sub-combinations":[107],"of":[108,113,128,136],"tasks,":[109],"confirming":[110],"advantages":[112],"MTL":[115],"framework.":[116],"Finally,":[117],"introduce":[119],"new":[121],"HD":[122,151],"dataset":[123,146],"containing":[124],"balanced":[126],"number":[127],"sentences":[129],"diverse":[131],"contexts":[132],"variety":[135],"homographs":[137],"their":[139],"pronunciations.":[140],"demonstrate":[142],"incorporating":[144],"this":[145],"into":[147],"training":[148],"significantly":[149],"improves":[150],"over":[153],"only":[154],"using":[155],"used,":[158],"but":[159],"imbalanced,":[160],"pre-existing":[161],"dataset.":[162]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
