{"id":"https://openalex.org/W2028051561","doi":"https://doi.org/10.1109/iscslp.2012.6423489","title":"Perceptual clustering based unit selection optimization for concatenative text-to-speech synthesis","display_name":"Perceptual clustering based unit selection optimization for concatenative text-to-speech synthesis","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W2028051561","doi":"https://doi.org/10.1109/iscslp.2012.6423489","mag":"2028051561"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2012.6423489","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2012.6423489","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 8th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031594049","display_name":"Tao Jiang","orcid":"https://orcid.org/0000-0002-9406-5061"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Tao Jiang","raw_affiliation_strings":["Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems Graduate School at Shenzhen, Tsinghua University, 518055, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems, Graduate School at Shenzhen, Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems Graduate School at Shenzhen, Tsinghua University, 518055, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102869280","display_name":"Zhiyong Wu","orcid":"https://orcid.org/0000-0001-8533-0524"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Zhiyong Wu","raw_affiliation_strings":["Tsinghua University, Beijing, Beijing, CN","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems Graduate School at Shenzhen, Tsinghua University, 518055, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, Beijing, CN","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems Graduate School at Shenzhen, Tsinghua University, 518055, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405572","display_name":"Jia Jia","orcid":"https://orcid.org/0000-0002-7336-4003"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Jia","raw_affiliation_strings":["Tsinghua National Laboratory for Information Science and Technology (TNList), Department of Computer Science and Technology, Tsinghua University, Beijing, China","Tsinghua National Laboratory for Information Science and Technology (TNlist ), Department of Computer Science and Technology, Tsinghua University, Beijing 100084, China#TAB#"],"affiliations":[{"raw_affiliation_string":"Tsinghua National Laboratory for Information Science and Technology (TNList), Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua National Laboratory for Information Science and Technology (TNlist ), Department of Computer Science and Technology, Tsinghua University, Beijing 100084, China#TAB#","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053661520","display_name":"Lianhong Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Lianhong Cai","raw_affiliation_strings":["Tsinghua University, Beijing, Beijing, CN","Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems Graduate School at Shenzhen, Tsinghua University, 518055, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, Beijing, CN","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems Graduate School at Shenzhen, Tsinghua University, 518055, China","institution_ids":["https://openalex.org/I99065089","https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5031594049"],"corresponding_institution_ids":["https://openalex.org/I889458895","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.09090137,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"6","issue":null,"first_page":"64","last_page":"68"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7691296935081482},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.7194831371307373},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7147400379180908},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.6435421705245972},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.5969997644424438},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5639059543609619},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5633976459503174},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.556833803653717},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.47295257449150085},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.465154230594635},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4571603834629059},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4469241797924042},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.4398390054702759},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3329751491546631},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.08106926083564758}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7691296935081482},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.7194831371307373},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7147400379180908},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.6435421705245972},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.5969997644424438},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5639059543609619},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5633976459503174},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.556833803653717},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.47295257449150085},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.465154230594635},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4571603834629059},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4469241797924042},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.4398390054702759},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3329751491546631},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.08106926083564758},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2012.6423489","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2012.6423489","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 8th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W60941773","https://openalex.org/W164706226","https://openalex.org/W1568980958","https://openalex.org/W1570923747","https://openalex.org/W1599623585","https://openalex.org/W1896470158","https://openalex.org/W1990368529","https://openalex.org/W1998871699","https://openalex.org/W2001619934","https://openalex.org/W2107860279","https://openalex.org/W2121712227","https://openalex.org/W2150658333","https://openalex.org/W6602482793","https://openalex.org/W6606687067","https://openalex.org/W6633911015"],"related_works":["https://openalex.org/W4391272374","https://openalex.org/W1914543332","https://openalex.org/W2946856121","https://openalex.org/W2108985546","https://openalex.org/W2081919107","https://openalex.org/W2433276473","https://openalex.org/W1537411440","https://openalex.org/W1984347656","https://openalex.org/W2535215250","https://openalex.org/W2024201202"],"abstract_inverted_index":{"In":[0,56],"concatenative":[1],"based":[2,88],"speech":[3,14,17,83,96,180],"synthesis,":[4],"the":[5,23,27,63,72,82,102,105,112,116,123,127,131,137,150,153,158,173,185],"purpose":[6],"of":[7],"unit":[8,54,73,175],"selection":[9,74,176],"is":[10,107,145],"to":[11,39,51,61,70,100,110,148,161],"select":[12],"proper":[13],"units":[15,25,84],"from":[16,115,157],"corpus":[18],"by":[19],"measuring":[20],"how":[21],"well":[22],"selected":[24],"match":[26],"given":[28],"features.":[29],"Perceptual":[30],"test":[31],"indicates":[32],"that":[33,172],"some":[34],"features":[35,46,67,120,133],"are":[36,126,134],"always":[37],"preferred":[38,128],"make":[40,162],"perceptual":[41,76,91,167],"distinction":[42],"between":[43,94],"units.":[44,97],"Such":[45],"should":[47],"be":[48],"judged":[49],"prior":[50],"others":[52],"in":[53,122,136],"selection.":[55],"this":[57],"work,":[58],"we":[59],"attempt":[60],"identify":[62,101],"priorities":[64],"for":[65,152],"different":[66,95],"and":[68,130,166],"try":[69],"optimize":[71],"with":[75,85,181],"clustering.":[77],"Out":[78],"approach":[79],"first":[80],"clusters":[81],"hierarchical":[86],"clustering":[87,117,159],"on":[89],"a":[90],"distance":[92],"measurement":[93],"A":[98],"method":[99],"questions":[103],"(concerning":[104],"features)":[106],"then":[108,146],"proposed":[109],"build":[111],"decision":[113,124],"tree":[114,125],"result.":[118],"The":[119],"used":[121,135],"ones,":[129],"other":[132],"target":[138,154],"cost":[139,155],"function.":[140],"Linear":[141],"discriminant":[142],"analysis":[143],"(LDA)":[144],"adopted":[147],"train":[149],"weights":[151,163],"function":[156],"result":[160],"more":[164],"reasonable":[165],"related..":[168],"Experimental":[169],"results":[170],"indicate":[171],"optimized":[174],"can":[177],"generate":[178],"synthetic":[179],"higher":[182],"naturalness":[183],"than":[184],"previous":[186],"approach.":[187]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
