{"id":"https://openalex.org/W131503466","doi":"https://doi.org/10.21437/interspeech.2005-799","title":"Comparing spectral distance measures for join cost optimization in concatenative speech synthesis","display_name":"Comparing spectral distance measures for join cost optimization in concatenative speech synthesis","publication_year":2005,"publication_date":"2005-09-04","ids":{"openalex":"https://openalex.org/W131503466","doi":"https://doi.org/10.21437/interspeech.2005-799","mag":"131503466"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2005-799","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2005-799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2005","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076709962","display_name":"Ingmund Bj\u00f8rkan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ingmund Bj\u00f8rkan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055323428","display_name":"Torbj\u00f8rn Svendsen","orcid":"https://orcid.org/0000-0003-0578-7941"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Torbj\u00f8rn Svendsen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5041655792","display_name":"Snorre Farner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Snorre Farner","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5076709962"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4409,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70754516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2577","last_page":"2580"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.9312576055526733},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7172384262084961},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5715582966804504},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43844282627105713},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36861008405685425},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3635213375091553},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.2292393445968628},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19764432311058044}],"concepts":[{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.9312576055526733},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7172384262084961},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5715582966804504},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43844282627105713},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36861008405685425},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3635213375091553},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2292393445968628},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19764432311058044}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2005-799","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2005-799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2005","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W88864901","https://openalex.org/W106974151","https://openalex.org/W1568980958","https://openalex.org/W1887401733","https://openalex.org/W1989337816","https://openalex.org/W2068804781","https://openalex.org/W2097900379","https://openalex.org/W2150658333","https://openalex.org/W2407840102","https://openalex.org/W2799061466"],"related_works":["https://openalex.org/W234770729","https://openalex.org/W2588431733","https://openalex.org/W2391796527","https://openalex.org/W2388523225","https://openalex.org/W2387179309","https://openalex.org/W2388979876","https://openalex.org/W4245668640","https://openalex.org/W2032960563","https://openalex.org/W2142993035","https://openalex.org/W2899044948"],"abstract_inverted_index":{"Abstract":[0],"In":[1,62],"concatenative":[2],"synthesis":[3,100,210],"the":[4,16,20,31,52,85,112,142,145,177,184,188,195,199,235,286,290,298],"join":[5,66],"cost":[6,21,126,129,147,167,179,197,200,237,252,292],"function":[7,284,295,306],"can":[8,156],"be":[9,157,170,279],"relatedtotheprobabilityofaperceiveddiscontinuityatthejoin.":[10],"There-fore":[11],"it":[12],"is":[13,47,68,101,116,148,198,212],"important":[14],"that":[15],"distance":[17,257,287],"measures":[18,132,258],"in":[19,39,87,215],"func-tion":[22],"correlate":[23],"highly":[24,95],"with":[25,91,162,190,207,226,242,256],"human":[26,53,243],"perceived":[27,54,273,301],"discontinuities.":[28],"Inthis":[29],"paper":[30],"results":[32],"of":[33,72,84,93,122,141,245,272,285,300],"a":[34,64,120,135,149,173,181,239,276,282,294],"listening":[35],"test":[36],"on":[37,103,119,259],"joins":[38],"two":[40,153,166,204],"Norwe-gian":[41],"long":[42],"vowels:":[43],"/A:/":[44],"and":[45,127,139,194,269],"/e:/,":[46],"presented.":[48],"Five":[49],"spectral":[50,265],"dis-tancemeasuresandtheF0differencearecomparedaspredictorsof":[51],"discontinuities":[55],"using":[56],"Receiver":[57],"Operat-ing":[58],"Characteristic":[59],"(ROC)":[60],"curves.":[61],"addition,":[63],"linear":[65,74],"costfunction":[67],"optimized":[69],"by":[70,172],"means":[71],"stepwise":[73],"regression.":[75],"1.":[76],"Introduction":[77],"Unit":[78,98],"selection":[79,99,209],"systems":[80],"are":[81,187],"considered":[82],"state":[83],"art":[86],"text":[88],"tospeech":[89],"synthesis,":[90],"capability":[92],"producing":[94],"natural-sounding":[96],"speech.":[97],"based":[102,118],"concate-natingsmallunitsofspeech,selectedfromalargedatabasecon-taining":[104],"multiple":[105],"candidatesfor":[106],"each":[107],"unit.":[108],"The":[109,250],"search":[110,175],"for":[111,176],"op-timal":[113],"unit":[114,136,160,208],"sequence":[115,161],"normally":[117],"combination":[121],"twocost":[123],"functions:":[124],"target":[125,192],"concatenation":[128,146,196,236,248,251,277,291],"[1].":[130],"Targetcost":[131],"how":[133,151],"well":[134,152],"matches":[137],"prosodic":[138],"phoneticfeatures":[140],"target,":[143],"while":[144],"measureof":[150],"neighboring":[154],"units":[155,186],"concatenated.":[158],"Theoptimal":[159],"respect":[163],"to":[164,254,297],"these":[165],"functionscan":[168],"then":[169,303],"found":[171],"Viterbi":[174],"lowest":[178],"paththrough":[180],"lattice,":[182],"where":[183],"database":[185],"nodes":[189],"anassociate":[191],"cost,":[193],"ofthe":[201],"path":[202],"between":[203],"nodes.One":[205],"problem":[206],"system":[211],"thelarge":[213],"variability":[214],"quality,":[216],"varying":[217],"from":[218],"almost":[219],"perfect":[220],"speechto":[221],"very":[222],"poor":[223],"quality":[224,232],"speech":[225],"many":[227],"disturbing":[228],"discontinu-ities.":[229],"To":[230],"improve":[231],"we":[233],"want":[234],"tohave":[238],"high":[240],"correlation":[241],"perception":[244],"discontinu-ities":[246],"at":[247,275],"points.":[249],"has":[253],"bemeasured":[255],"physical":[260],"measurable":[261],"prop-erties":[262],"such":[263],"as":[264,281,293],"parameters,":[266],"F0":[267],"(pitch)":[268],"power.":[270],"Theprobability":[271],"discontinuity":[274,302],"pointcan":[278],"estimated":[280],"composite":[283],"mea-sures.":[288],"De\ufb01ning":[289],"propor-tional":[296],"probability":[299],"gives":[304],"acost":[305]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
