{"id":"https://openalex.org/W2097998164","doi":"https://doi.org/10.21437/interspeech.2012-628","title":"Towards hierarchical prosodic prominence generation in TTS synthesis","display_name":"Towards hierarchical prosodic prominence generation in TTS synthesis","publication_year":2012,"publication_date":"2012-09-09","ids":{"openalex":"https://openalex.org/W2097998164","doi":"https://doi.org/10.21437/interspeech.2012-628","mag":"2097998164"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2012-628","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2012-628","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2012","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/portal/en/publications/towards-hierarchical-prosodic-prominence-generation-in-tts-synthesis(061f047d-c590-41c3-943f-b938ec6d0316).html","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063165262","display_name":"Leonardo Badino","orcid":"https://orcid.org/0000-0001-7037-5914"},"institutions":[{"id":"https://openalex.org/I30771326","display_name":"Italian Institute of Technology","ror":"https://ror.org/042t93s57","country_code":"IT","type":"facility","lineage":["https://openalex.org/I30771326"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Leonardo Badino","raw_affiliation_strings":["Istituto Italiano Di Tecnologia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Istituto Italiano Di Tecnologia","institution_ids":["https://openalex.org/I30771326"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084488961","display_name":"Robert A. Clark","orcid":"https://orcid.org/0000-0002-4892-3619"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Robert A. J. Clark","raw_affiliation_strings":["School of Informatics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060282305","display_name":"Mirjam Wester","orcid":"https://orcid.org/0000-0002-3199-0081"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mirjam Wester","raw_affiliation_strings":["School of Informatics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3269,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.8428696,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2398","last_page":"2401"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7226986885070801},{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.7121568918228149},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6926435828208923},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.634830117225647},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5825870633125305},{"id":"https://openalex.org/keywords/pitch-accent","display_name":"Pitch accent","score":0.5742453336715698},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5079482197761536},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4977250397205353},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4573464095592499},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4417960047721863},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.35529786348342896},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11628291010856628}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7226986885070801},{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.7121568918228149},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6926435828208923},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.634830117225647},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5825870633125305},{"id":"https://openalex.org/C2777672088","wikidata":"https://www.wikidata.org/wiki/Q1441804","display_name":"Pitch accent","level":3,"score":0.5742453336715698},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5079482197761536},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4977250397205353},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4573464095592499},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4417960047721863},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.35529786348342896},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11628291010856628},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2012-628","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2012-628","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2012","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:publications/061f047d-c590-41c3-943f-b938ec6d0316","is_oa":true,"landing_page_url":null,"pdf_url":"https://www.research.ed.ac.uk/portal/en/publications/towards-hierarchical-prosodic-prominence-generation-in-tts-synthesis(061f047d-c590-41c3-943f-b938ec6d0316).html","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:pure.ed.ac.uk:publications/061f047d-c590-41c3-943f-b938ec6d0316","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/061f047d-c590-41c3-943f-b938ec6d0316","pdf_url":"https://www.research.ed.ac.uk/portal/en/publications/towards-hierarchical-prosodic-prominence-generation-in-tts-synthesis(061f047d-c590-41c3-943f-b938ec6d0316).html","source":{"id":"https://openalex.org/S4306400321","display_name":"Edinburgh Research Explorer (University of Edinburgh)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98677209","host_organization_name":"University of Edinburgh","host_organization_lineage":["https://openalex.org/I98677209"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Badino, L, Clark, R A J & Wester, M 2012, Towards Hierarchical Prosodic Prominence Generation in TTS Synthesis. in INTERSPEECH 2012 13th Annual Conference of the International Speech Communication Association. International Speech Communication Association, pp. 2398-2401. < http://www.isca-speech.org/archive/archive_papers/interspeech_2012/i12_2398.pdf >","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.249.3702","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.249.3702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cstr.inf.ed.ac.uk/downloads/publications/2012/badinoclark_IS_2012.pdf","raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:publications/061f047d-c590-41c3-943f-b938ec6d0316","is_oa":true,"landing_page_url":null,"pdf_url":"https://www.research.ed.ac.uk/portal/en/publications/towards-hierarchical-prosodic-prominence-generation-in-tts-synthesis(061f047d-c590-41c3-943f-b938ec6d0316).html","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.5899999737739563,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2097998164.pdf","grobid_xml":"https://content.openalex.org/works/W2097998164.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W397522103","https://openalex.org/W1486533484","https://openalex.org/W1494687963","https://openalex.org/W1503285781","https://openalex.org/W1539337024","https://openalex.org/W1548765444","https://openalex.org/W1562405582","https://openalex.org/W1563645159","https://openalex.org/W1568793342","https://openalex.org/W1583314545","https://openalex.org/W1594031697","https://openalex.org/W1798610767","https://openalex.org/W2079611736","https://openalex.org/W2102093296","https://openalex.org/W2117805756","https://openalex.org/W2119929864","https://openalex.org/W2127836646","https://openalex.org/W2146423738","https://openalex.org/W2147880316","https://openalex.org/W2912934387","https://openalex.org/W2950225692","https://openalex.org/W3085162807"],"related_works":["https://openalex.org/W2088008556","https://openalex.org/W2071315723","https://openalex.org/W2507083698","https://openalex.org/W2465473908","https://openalex.org/W4360877803","https://openalex.org/W49590631","https://openalex.org/W2474947928","https://openalex.org/W2967415048","https://openalex.org/W4301230705","https://openalex.org/W1489682032"],"abstract_inverted_index":{"We":[0,18],"address":[1],"the":[2,31,44,47,51,54,78,84,91,102],"problem":[3],"of":[4,10,30,46,94,104,107],"identification":[5],"(from":[6],"text)":[7,73],"and":[8,37,99],"generation":[9],"pitch":[11,35,121],"accents":[12],"in":[13],"HMM-based":[14],"English":[15],"TTS":[16],"synthesis.":[17],"show,":[19],"through":[20],"a":[21,27,58,105,112],"large":[22,28],"scale":[23],"perceptual":[24],"test,":[25],"that":[26,62,66,89],"improvement":[29],"binary":[32],"discrimination":[33],"between":[34],"accented":[36],"non-accented":[38],"words":[39,65],"has":[40],"no":[41],"effect":[42],"on":[43,77,86],"quality":[45],"speech":[48,118],"generated":[49],"by":[50],"system.":[52],"On":[53],"other":[55],"side":[56],"adding":[57],"third":[59],"accent":[60],"type":[61],"emphatically":[63],"marks":[64],"convey":[67],"\u201dcontrastive\u201d":[68],"focus":[69,123],"(automatically":[70],"identified":[71],"from":[72,111],"produces":[74],"beneficial":[75],"effects":[76],"synthesized":[79],"speech.":[80],"These":[81],"results":[82],"support":[83],"accounts":[85],"prosodic":[87,92],"prominence":[88],"consider":[90],"patterns":[93],"utterances":[95],"as":[96],"hierarchical":[97],"structured":[98],"point":[100],"out":[101],"limits":[103],"flattening":[106],"such":[108],"structure":[109],"resulting":[110],"simple":[113],"accent/non-accent":[114],"distinction.":[115],"Index":[116],"Terms:":[117],"synthesis,":[119],"HMM,":[120],"accents,":[122],"detection":[124],"1.":[125]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":27},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
