{"id":"https://openalex.org/W172722986","doi":"https://doi.org/10.21437/icslp.2002-590","title":"Adaptive estimation of time-varying features from high-pitched speech based on an excitation source HMM","display_name":"Adaptive estimation of time-varying features from high-pitched speech based on an excitation source HMM","publication_year":2002,"publication_date":"2002-09-16","ids":{"openalex":"https://openalex.org/W172722986","doi":"https://doi.org/10.21437/icslp.2002-590","mag":"172722986"},"language":"en","primary_location":{"id":"doi:10.21437/icslp.2002-590","is_oa":false,"landing_page_url":"https://doi.org/10.21437/icslp.2002-590","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"7th International Conference on Spoken Language Processing (ICSLP 2002)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046057045","display_name":"Akira Sasou","orcid":"https://orcid.org/0000-0003-1700-0325"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Akira Sasou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103480519","display_name":"Kazuyo Tanaka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kazuyo Tanaka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00992556,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2161","last_page":"2164"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10534","display_name":"Structural Health Monitoring Techniques","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8363223671913147},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.754761815071106},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.714879035949707},{"id":"https://openalex.org/keywords/viterbi-algorithm","display_name":"Viterbi algorithm","score":0.5606189370155334},{"id":"https://openalex.org/keywords/linear-prediction","display_name":"Linear prediction","score":0.5443252921104431},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.4660870432853699},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.45256829261779785},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.44532105326652527},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4421069622039795},{"id":"https://openalex.org/keywords/viterbi-decoder","display_name":"Viterbi decoder","score":0.43806084990501404},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.41896364092826843},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.361259788274765},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3398160934448242},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2396625578403473},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.17622855305671692},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12352657318115234}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8363223671913147},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.754761815071106},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.714879035949707},{"id":"https://openalex.org/C60582962","wikidata":"https://www.wikidata.org/wiki/Q83886","display_name":"Viterbi algorithm","level":3,"score":0.5606189370155334},{"id":"https://openalex.org/C131109320","wikidata":"https://www.wikidata.org/wiki/Q581012","display_name":"Linear prediction","level":2,"score":0.5443252921104431},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.4660870432853699},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.45256829261779785},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.44532105326652527},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4421069622039795},{"id":"https://openalex.org/C117379686","wikidata":"https://www.wikidata.org/wiki/Q6996459","display_name":"Viterbi decoder","level":3,"score":0.43806084990501404},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.41896364092826843},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.361259788274765},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3398160934448242},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2396625578403473},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.17622855305671692},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12352657318115234}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/icslp.2002-590","is_oa":false,"landing_page_url":"https://doi.org/10.21437/icslp.2002-590","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"7th International Conference on Spoken Language Processing (ICSLP 2002)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.49000000953674316,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W83163531","https://openalex.org/W140137236","https://openalex.org/W1584530349","https://openalex.org/W1856245758","https://openalex.org/W1966264494","https://openalex.org/W2017427968","https://openalex.org/W2097645910","https://openalex.org/W2098457679","https://openalex.org/W2119555423","https://openalex.org/W2154291215"],"related_works":["https://openalex.org/W2102309991","https://openalex.org/W1795315578","https://openalex.org/W2373954783","https://openalex.org/W2535886977","https://openalex.org/W2133857928","https://openalex.org/W2143297499","https://openalex.org/W2356694334","https://openalex.org/W2991144886","https://openalex.org/W2790444905","https://openalex.org/W1843778016"],"abstract_inverted_index":{"ABSTRACTThis":[0],"paper":[1],"describes":[2],"a":[3,22,28,64,169,181,216,259,278,355,361,381],"method":[4,20,93,112,166,191,306,322,353],"of":[5,27,53,84,133,173,219,236,271,298,327,339,360,376,388,406],"extracting":[6,95],"time-varyingfeatures":[7,297],"that":[8,25,68,291,307],"is":[9,60,113,130,186,192,221],"effective":[10],"for":[11,33,42,144,178,195,199,364,370],"speech":[12,117,170,198,286,356,414],"signals":[13],"with":[14,79],"high":[15],"funda-mental":[16],"frequencies.":[17],"The":[18,57,159,320,374],"proposed":[19,58,293,321,352,417],"adopts":[21,354],"speechproduction":[23],"model":[24,46,242,288,294,418],"consists":[26,359],"Time-Varying":[29,310],"Auto-Regressive":[30,175,311],"(TVAR)":[31,312],"process":[32,177,363],"an":[34,43,174,179,184,304,365,368,371,434],"articulatory":[35,366],"\ufb01lter":[36],"and":[37,76,100,103,207,314],"aHidden":[38],"Markov":[39],"Model":[40],"(HMM)":[41],"excitation":[44,55,86,238,372,399,410,435],"source.The":[45],"represents":[47,419],"waveform":[48,431],"amplitude":[49,432],"variations":[50],"bytime-varying":[51],"gain":[52,78,315],"the":[54,81,85,91,134,141,200,204,223,237,241,249,253,268,285,292,318,331,398,404,420,425,430,439],"source.":[56,373],"algo-rithm":[59],"given":[61],"by":[62,329],"extending":[63],"Viterbi":[65],"algorithm":[66,70],"so":[67,215,252,290],"theproposed":[69],"can":[71,295,323],"adaptively":[72,308],"estimate":[73,137,403],"TVAR":[74,362],"coef\ufb01-cients":[75],"time-varying":[77,96,344,421,442],"decoding":[80],"state":[82,383],"tran-sition":[83],"source":[87,239,400,411,436],"HMM.":[88],"We":[89,301,391],"applied":[90],"pro-posed":[92],"to":[94,115,124,151,228,246,266,386],"features":[97,273,345,422],"from":[98,412,438],"bothsynthetic":[99],"natural":[101],"speech,":[102],"con\ufb01rmed":[104],"its":[105],"feasibility.1.":[106],"INTRODUCTIONThe":[107],"conventional":[108],"Linear":[109],"Prediction":[110],"(LP)":[111],"widelyused":[114],"analyze":[116],"signals[1].":[118],"However,":[119,189],"several":[120],"prob-lems":[121],"still":[122],"remain":[123],"be":[125,229,247],"solved[2].":[126],"One":[127],"such":[128,258],"problem":[129],"thatlocal":[131],"peaks":[132],"LP":[135,395],"spectral":[136],"are":[138,209,244,378],"strongly":[139],"biasedtoward":[140],"harmonics,":[142],"especially":[143],"high-pitched":[145,413],"speech.Several":[146],"methods":[147],"have":[148,161,392],"been":[149],"designed":[150],"overcome":[152],"this":[153,190,281],"prob-lem":[154],"[3,":[155],"4,":[156],"5,":[157],"6].":[158],"authors":[160],"previously":[162,393],"indicated":[163],"thatan":[164],"analysis":[165,224,250,261,396],"based":[167,316],"on":[168,317],"production":[171,287,357],"modelconsisting":[172],"(AR)":[176],"artic-ulatory\ufb01lterand":[180],"HiddenMarkov":[182],"Model(HMM)for":[183],"ex-citationsource":[185],"robustforhighfundamental":[187],"frequencies[7,8].":[188],"not":[193],"suitable":[194],"analyzing":[196],"con-tinuous":[197],"following":[201],"reasons.":[202],"First,":[203],"AR":[205],"co-ef\ufb01cients":[206],"HMM":[208,333,369,440],"iteratively":[210],"estimated":[211],"within":[212,257],"everyanalysis":[213],"frame,":[214,251],"large":[217,230],"number":[218],"operations":[220,328],"needed.Second,":[222],"frame":[225],"size":[226],"needs":[227],"in":[231,277,380,384],"orderto":[232],"guarantee":[233],"stable":[234],"learning":[235],"HMM.Third,":[240],"parameters":[243,255],"assumed":[245],"constantwithin":[248],"resulting":[254],"areaveraged":[256],"long":[260],"frame.":[262],"This":[263],"makesit":[264],"dif\ufb01cult":[265],"extract":[267,336],"dynamic":[269,337],"characteristics":[270,338,405],"speechwhen":[272],"change":[274],"rapidly,":[275],"like":[276],"singing":[279],"voice.In":[280],"paper,":[282],"we":[283],"extend":[284],"in[7]":[289],"represent":[296],"continuous":[299,340],"speech.":[300],"also":[302,335,429],"describe":[303],"anal-ysis":[305],"estimates":[309],"coef\ufb01cients":[313],"newmodel.":[319],"substantially":[324],"reduce":[325],"thenumber":[326],"applying":[330],"learned":[332],"andcan":[334],"speechby":[341],"estimating":[342],"those":[343],"adaptively.2.":[346],"SPEECH":[347],"PRODUCTION":[348],"MODEL":[349],"BASED":[350],"ONTVAR-HMMThe":[351],"modelthat":[358],"\ufb01lterand":[367],"nodes":[375],"theHMM":[377],"concatenated":[379],"ring":[382],"order":[385],"representperiodicity":[387],"voiced":[389],"sounds.":[390],"shownthat":[394],"incorporating":[397],"HMMcan":[401],"precisely":[402],"both":[407],"vocal":[408,426],"tractand":[409],"signal[7,":[415],"8].The":[416],"ofnot":[423],"only":[424],"tract":[427],"but":[428],"bymultiplying":[433],"emitted":[437],"bya":[441],"gain.":[443]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
