{"id":"https://openalex.org/W330074099","doi":"https://doi.org/10.21437/interspeech.2006-578","title":"Improving the performance of HMM-based voice conversion using context clustering decision tree and appropriate regression matrix format","display_name":"Improving the performance of HMM-based voice conversion using context clustering decision tree and appropriate regression matrix format","publication_year":2006,"publication_date":"2006-09-17","ids":{"openalex":"https://openalex.org/W330074099","doi":"https://doi.org/10.21437/interspeech.2006-578","mag":"330074099"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2006-578","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2006-578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2006","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064290493","display_name":"Long Qin","orcid":"https://orcid.org/0000-0001-5639-4983"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Qin","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039492028","display_name":"Yi-Jian Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi-Jian Wu","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059767940","display_name":"Zhen-Hua Ling","orcid":"https://orcid.org/0000-0001-7853-5273"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen-Hua Ling","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102734836","display_name":"Ren-Hua Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ren-Hua Wang","raw_affiliation_strings":["University of Science and Technology of China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.7302,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.92651152,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"paper 1105","last_page":"Thu1BuP.1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9535999894142151,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8050810098648071},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7931388020515442},{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.747338056564331},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7381606101989746},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.6219477653503418},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5443188548088074},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.4702540636062622},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45862334966659546},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.42758113145828247},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4124663770198822},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33844345808029175}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8050810098648071},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7931388020515442},{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.747338056564331},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7381606101989746},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.6219477653503418},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5443188548088074},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.4702540636062622},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45862334966659546},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.42758113145828247},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4124663770198822},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33844345808029175},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2006-578","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2006-578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2006","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4699999988079071},{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W129217914","https://openalex.org/W133559434","https://openalex.org/W2049686551","https://openalex.org/W2146871184","https://openalex.org/W2158069733","https://openalex.org/W2160084280","https://openalex.org/W2166823384","https://openalex.org/W2276283915","https://openalex.org/W2376932199","https://openalex.org/W3036802551"],"related_works":["https://openalex.org/W2079655441","https://openalex.org/W2912293245","https://openalex.org/W4252942110","https://openalex.org/W1604114751","https://openalex.org/W2032941915","https://openalex.org/W2075706796","https://openalex.org/W4391272374","https://openalex.org/W4400309480","https://openalex.org/W2549308614","https://openalex.org/W2081919107"],"abstract_inverted_index":{"Abstract":[0],"To":[1],"improve":[2],"the":[3,6,13,19,32,36,52,55,65,72,76,86,95,99,104,128,131,136,141,157,175,195,210,213,218,242,261,277,290,297,318,327,343,349,355,376,379,383,387,394,400,417,425,436,441,468,480,502,505,512,517,523],"performance":[4,385],"of":[5,42,58,64,75,130,140,164,197,200,212,234,241,273,306,329,332,378,393,471,483,488,504,525],"HMM-based":[7,203,214,219,284,344,437,442,490],"voice":[8,176,184,188,204,239,331,345,388,391,491],"conversion":[9,177,205,346,492],"system":[10,160,206,287,325,384,493],"in":[11,46,230,288,342,354,434,449,496],"which":[12,179,289,373,429],"LSP":[14,56,291,469],"coefficient":[15],"is":[16,44,68,151,207,404,447,458,465,494],"introduced":[17,313],"as":[18,260,467],"spectral":[20,293,308],"representation,":[21],"a":[22,147,153,171,190,231,255,270,283,413,536],"model":[23,33,106,116,256,380,518],"clustering":[24,351,414,508],"technique":[25,178],"to":[26,51,71,161,185,193,251,409],"tie":[27],"HMMs":[28,88,235,367,423],"into":[29,370],"classes":[30],"for":[31,156,516],"adaptation,":[34,117],"considering":[35,416],"phonetic":[37,418],"and":[38,81,110,121,138,167,225,296,301,390,419,440,511,529],"linguistic":[39,420],"contextual":[40,360],"factors":[41,361],"HMMs,":[43,363],"adopted":[45],"this":[47,450,484],"paper.":[48,451],"Besides,":[49],"due":[50],"relationship":[53],"between":[54,362,422],"coefficients":[57,295,470],"adjacent":[59,474],"orders,":[60],"an":[61,453,486],"appropriate":[62,454,513],"format":[63,124,457],"regression":[66,118,122,455,514],"matrix":[67,119,123,456,515],"suggested":[69,459],"according":[70],"small":[73,271],"amount":[74,272],"adaptation":[77,257,401],"training":[78,402,463],"data.":[79],"Subjective":[80],"objective":[82,530],"tests":[83],"prove":[84],"that":[85],"source":[87],"can":[89,180,245],"be":[90,246],"adapted":[91,105],"more":[92],"accurately":[93],"using":[94,303,424],"proposed":[96,506],"method,":[97,415],"meanwhile":[98],"synthetic":[100,142,243,395],"speech":[101,111,133,143,163,199,215,220,244,274,285,396,438,443],"generated":[102],"from":[103,248],"has":[107,144,430],"better":[108],"discrimination":[109],"quality.":[112],"Index":[113],"Terms":[114],":":[115],"clustering,":[120],"1.":[125],"Introduction":[126],"With":[127],"development":[129],"corpus-based":[132,158],"synthesis":[134,221,286,324,444],"technique,":[135],"intelligibility":[137],"naturalness":[139],"been":[145,431],"improved":[146],"lot.":[148],"However,":[149,335],"it":[150],"still":[152,337],"difficult":[154],"problem":[155],"TTS":[159],"synthesize":[162],"various":[165,333],"speakers":[166],"speaking":[168],"styles":[169],"with":[170,269,326],"limited":[172],"database.":[173],"So":[174],"convert":[181],"one":[182,249,371],"speaker\u2019s":[183,187],"another":[186,252],"provides":[189,535],"positive":[191],"approach":[192],"achieve":[194],"goal":[196],"synthesizing":[198,330],"multi-speakers.":[201],"The":[202],"built":[208],"on":[209],"basis":[211],"synthesis.":[216],"In":[217,237,407,479],"system,":[222],"spectrum,":[223],"pitch":[224],"duration":[226],"are":[227,312,368],"modeled":[228],"simultaneously":[229],"unified":[232],"framework":[233],"[1][2][3].":[236],"addition,":[238],"characteristics":[240,389],"converted":[247],"speaker":[250],"by":[253,276,316],"applying":[254],"algorithm,":[258,320],"such":[259],"MLLR":[262,319,356],"(maximum":[263],"likelihood":[264],"linear":[265],"regression)":[266],"algorithm":[267,311,357],"[4][5],":[268],"uttered":[275],"target":[278],"speaker.":[279],"We":[280],"have":[281,476],"realized":[282],"(line":[292],"pair)":[294],"STRAIGHT":[298],"(Speech":[299],"Transformation":[300],"Representation":[302],"Adaptive":[304],"Interpolation":[305],"weighted":[307],"contour)":[309],"analysis-synthesis":[310],"[6][7].":[314],"Then,":[315],"realizing":[317],"we":[321],"provide":[322],"our":[323,489],"ability":[328],"speakers.":[334],"there":[336],"exist":[338],"two":[339],"main":[340],"problems":[341],"system.":[347],"Firstly,":[348],"data-driven":[350],"method":[352],"described":[353,448],"ignores":[358],"many":[359],"therefore":[364],"some":[365],"unrelated":[366],"forced":[369],"class":[372],"will":[374],"affect":[375],"accuracy":[377],"adaptation.":[381,519],"Secondly,":[382],"including":[386,527],"quality":[392],"decreases":[397],"greatly":[398],"when":[399,460],"data":[403,464],"very":[405,461],"limited.":[406],"order":[408],"solve":[410],"these":[411],"problems,":[412],"connections":[421],"context":[426,507],"decision":[427,509],"tree,":[428],"applied":[432],"similarly":[433],"both":[435],"recognition":[439],"areas":[445],"[8][9],":[446],"Moreover,":[452],"few":[462],"available,":[466],"only":[472],"several":[473],"orders":[475],"strong":[477],"correlations.":[478],"following":[481],"part":[482],"paper,":[485],"overview":[487],"presented":[495],"section":[497,533],"2.":[498],"Section":[499,520],"3":[500],"describes":[501],"details":[503],"tree":[510],"4":[521],"presents":[522],"results":[524],"experiments":[526],"subjective":[528],"evaluations":[531],"while":[532],"5":[534],"final":[537],"conclusion.":[538]},"counts_by_year":[{"year":2014,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
