{"id":"https://openalex.org/W102093577","doi":"https://doi.org/10.21437/eurospeech.1997-52","title":"Acoustic modeling based on the MDL principle for speech recognition","display_name":"Acoustic modeling based on the MDL principle for speech recognition","publication_year":1997,"publication_date":"1997-09-22","ids":{"openalex":"https://openalex.org/W102093577","doi":"https://doi.org/10.21437/eurospeech.1997-52","mag":"102093577"},"language":"en","primary_location":{"id":"doi:10.21437/eurospeech.1997-52","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.1997-52","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"5th European Conference on Speech Communication and Technology (Eurospeech 1997)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081629487","display_name":"Koichi Shinoda","orcid":"https://orcid.org/0000-0003-1095-3203"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Koichi Shinoda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5030676365","display_name":"Takao Watanabe","orcid":"https://orcid.org/0000-0002-3175-3609"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takao Watanabe","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5081629487"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6038,"has_fulltext":false,"cited_by_count":95,"citation_normalized_percentile":{"value":0.73020753,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"99","last_page":"102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.791252076625824},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7028623819351196},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6219237446784973},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6123343110084534},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5978809595108032},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.4836617410182953},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.43667036294937134},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4243614375591278},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38349947333335876}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.791252076625824},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7028623819351196},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6219237446784973},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6123343110084534},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5978809595108032},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.4836617410182953},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.43667036294937134},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4243614375591278},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38349947333335876},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/eurospeech.1997-52","is_oa":false,"landing_page_url":"https://doi.org/10.21437/eurospeech.1997-52","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"5th European Conference on Speech Communication and Technology (Eurospeech 1997)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W1872806465"],"related_works":["https://openalex.org/W2136763963","https://openalex.org/W2109705048","https://openalex.org/W2940588515","https://openalex.org/W2153098279","https://openalex.org/W1909151225","https://openalex.org/W1987783679","https://openalex.org/W2160030256","https://openalex.org/W4253235840","https://openalex.org/W3151937861","https://openalex.org/W2061937230"],"abstract_inverted_index":{"ACOUSTIC":[0],"MODELING":[1],"BASED":[2],"ON":[3],"THE":[4],"MDLPRINCIPLE":[5],"FOR":[6],"SPEECH":[7],"RECOGNITIONKoichi":[8],"Shinoda":[9],"and":[10,120,399,440,595,604,714],"Takao":[11],"WatanabeNEC":[12],"Corp":[13],"oration4-1-1":[14],"Miyazaki,":[15],"Miyamae-ku,":[16],"Kawasaki":[17],"216,":[18],"JAPANfshino":[19],"da,watanab":[20],"eg@hum.cl":[21],".nec.co.jpABSTRACTRecently":[22],"context-dep":[23],"endent":[24],"phone":[25,201,208,285],"units,":[26,471],"such":[27,50,195,258,272],"as":[28,68,618,856,928],"tri-phones,":[29],"have":[30,168,366,715],"b":[31,98,169,234,250,367,428,527,635,685,754,784,823,826],"een":[32,170,368,755,824],"used":[33,126,741,756,927],"to":[34,63,69,77,127,249,253,270,355,357,427,520,535,570,639,654,684,783,800,809],"mo":[35,141,153,202,209,219,228,232,265,348,353,495,510,644,762,834,840,846,859,866,872,885,900,925,932],"del":[36,633,835,847,860],"subword":[37,58,393],"units":[38,381,426,445,492,507,538,782],"in":[39,109,121,193,237,295,300,337,449,461,500,533,591,670,730,830,915],"sp":[40,174,178,238,338],"eechrecognition":[41],"based":[42,549,747],"on":[43,173,550,667,676,748],"Hidden":[44,184],"Markov":[45,185],"Mo":[46,186],"dels":[47,142,154,187,203,220,233,349,496],"(HMMs).While":[48],"most":[49,194,301,483,544],"metho":[51,107,146,364,419,431,459,487,514,559,572,660],"ds":[52,365,432,661],"employ":[53,350],"clustering":[54,95,112,160,351,363,383,513],"of":[55,80,94,116,131,155,205,217,226,257,263,276,326,352,360,408,438,443,484,490,493,504,508,542,580,622,629,642,658,680,691,804,865,871,879,896,899],"theHMM":[56],"parameters(e.g.,":[57],"clustering,":[59,394],"state":[60,111,413,810],"cluster-ing,":[61],"etc.),":[62],"control":[64,655,801],"HMM":[65],"size":[66],"so":[67,309],"avoid":[70],"p":[71,100,341],"o":[72,583,673,906],"or":[73,675,698],"recogni-tion":[74,344],"accuracy":[75,150],"due":[76],"an":[78,749,764,818,883],"insu\u000eciency":[79,332],"training":[81,245,288,319,531],"data,":[82,532,713],"noneof":[83],"them":[84],"provide":[85],"any":[86],"e":[87,99,251,429,540,686,775,785,827,828],"ective":[88,776,829],"criterion":[89,124,773,820,844],"for":[90,305,371,379,390,742,757,767,779,789,853,909,922],"the":[91,129,140,152,162,197,214,242,279,296,303,361,380,404,409,422,435,441,444,470,485,491,506,523,529,537,543,547,551,555,564,581,592,596,600,614,619,626,630,640,656,668,671,677,737,759,768,781,802,832,849,857,876,880,890,903,910,918,923],"optimal":[92,833,858],"de-gree":[93],"that":[96,139,192,225,311,598],"should":[97],"erformed.":[101],"This":[102,330,771,934],"pap":[103,722],"erprop":[104],"oses":[105],"a":[106,259,273,283,323,501,665,689,700,727,863,869,929],"d":[108,147,584,674,907],"which":[110,122,282,472,821],"is":[113,125,189,221,247,268,328,384,497,517,562,567,587,602,740,746,774,817,917],"accom-plished":[114],"byway":[115],"phonetic":[117,813],"decision":[118,814],"trees":[119],"theMDL":[123],"optimize":[128],"degree":[130,657,803],"cluster-ing.":[132],"Large-vo":[133],"cabulary":[134],"Japanese":[135],"recognition":[136,180,340],"exp":[137,693,709],"erimentsshow":[138],"obtained":[143],"by":[144,476,585,699],"this":[145,577,606,649,721],"achievedthe":[148],"highest":[149],"among":[151,862],"various":[156],"sizesobtained":[157],"with":[158,281,411,848,882],"conventional":[159],"approaches.1.INTRODUCTIONOver":[161],"past":[163],"few":[164],"years,":[165],"extensive":[166],"studies":[167],"car-ried":[171],"out":[172,448,466,499],"eaker-indep":[175],"end":[176],"ent":[177],"eech":[179,239,339],"us-ing":[181],"continuous":[182],"density":[183],"(HMMs).It":[188],"well":[190],"known":[191],"systems,":[196],"use":[198,256,392,397,433],"ofcontext-dep":[199],"endent(CD)":[200],"instead":[204,503],"context-indep":[206],"endent(CI)":[207],"dels(monophon":[210],"es),":[211],"improvesrecognition":[212],"accuracy[1-7].Since":[213],"numb":[215,261,620,678,894],"er":[216,262,621,679,723,895],"CD":[218,231,264,284,313,347,509,643],"usually":[222,290],"much":[223],"largerthan":[224],"CI":[227,494],"dels,":[229],"using":[230,346,664,695,812],"etter":[235],"capturesvariations":[236],"data.":[240,277],"However,":[241],"amountof":[243],"aail-able":[244],"data":[246,289,320,327,331,697,855],"likely":[248],"insu\u000ecient":[252],"supp":[254],"ortthe":[255],"large":[260,274,324],"dels.":[266,841],"It":[267],"oftenimpractical":[269],"prepare":[271],"amount":[275,325],"Fur-thermore,":[278],"frequency":[280],"app":[286,317],"earsin":[287],"di":[291],"ers":[292],"substantiall":[293],"y":[294],"set":[297,634,641,864,870],"ofCD":[298],"phones;":[299],"case,":[302],"frequencies":[304],"some":[306],"CDphones":[307],"are":[308,376,417,446,473,707],"small":[310],"those":[312],"phones":[314],"do":[315],"not":[316,777],"earin":[318],"even":[321],"if":[322],"pro-vided.":[329],"often":[333],"causes":[334],"serious":[335],"degra-dation":[336],"erformance.":[342],"Most":[343],"systems":[345],"delparameters":[354],"try":[356],"alleviate":[358],"part":[359],"problem.Various":[362],"develop":[369],"ed":[370],"thispurp":[372],"ose.":[373],"First,":[374],"there":[375,416],"several":[377,418],"choices":[378],"towhich":[382],"carried":[385,447,498],"out;":[386],"K.F.":[387],"Leeet":[388],"al.[1],":[389],"ex-ample,":[391],"Hwanget":[395],"al.[2]":[396],"stateclustering,":[398],"Digalakis":[400],"et":[401],"al.[3]":[402],"cluster":[403],"mixture":[405],"com-p":[406],"onents":[407],"HMMs":[410],"Gaussian-mixture":[412],"ob-servation":[414],"densities.Second,":[415],"dsto":[420],"select":[421,536],"acoustically-si":[423],"mi":[424],"lar":[425],"clustered.Some":[430],"only":[434,778],"acoustic":[436,467,524],"characteristics":[437],"thedata":[439],"merging":[442,505],"ab":[450,465],"ottom-up":[451,574],"manner[4":[452],",":[453],"2,":[454],"3":[455],"].":[456],"The":[457,842,912],"other":[458,796],"ds,":[460,488,515],"addi-tion,":[462],"utilizea":[463],"prioriknowledge":[464],"similariti":[468,525],"esbetween":[469],"mostly":[474],"represented":[475],"deci-sion":[477],"trees[1,":[478],"5,":[479],"6,":[480],"7].":[481],"In":[482,576,612],"latter":[486],"split-ting":[489],"top-downmanner,":[502],"dels.In":[511],"these":[512],"it":[516],"imp":[518],"ortant":[519],"prop-erly":[521],"measure":[522],"es":[526],"etween":[528],"unitsutilizing":[530],"order":[534],"tob":[539],"clustered.One":[541],"successful":[545],"approachis":[546],"approach":[548,608,650,729],"maximum-likel":[552],"ihood(ML)criterion(e.g.,[7":[553],"]).In":[554],"following,for":[556],"simplicity,the":[557],"splitting":[558,586,663,811],"d(top-down":[560],"clustering)":[561],"explained,though":[563],"similar":[565],"explanation":[566],"also":[568,788],"applicable":[569],"themerging":[571],"d(b":[573],"clustering).":[575],"approach,the":[578],"increase":[579],"likeliho":[582,672,905],"calculated":[588],"foreach":[589],"unit":[590,593,597],"set,":[594],"has":[599,609,753,822],"largestincrease":[601],"selected":[603],"split.However,":[605],"ML":[607,738],"one":[610],"drawback.":[611],"mostcase,":[613],"likelihood":[615],"becomes":[616],"larger":[617],"unitsb":[623],"ecomes":[624,636],"larger.In":[625],"nal":[627],"stage":[628],"splitting,":[631],"themo":[632],"almost":[637],"identical":[638],"d-els":[645],"without":[646],"clustering.":[647,805],"Therefore,":[648,794],"requiresan":[651],"external":[652,797],"parameter":[653,798],"clustering.Most":[659],"limit":[662],"threshold":[666],"in-crease":[669],"units.":[681],"Thesethresholds":[682],"needs":[683],"optimized":[687],"through":[688],"series":[690],"recog-nition":[692],"eriments":[694],"test":[696],"cross-validationmetho":[701],"d.":[702],"These":[703],"optimization":[704],"pro":[705],"cesses":[706],"computation-ally":[708],"ensive,":[710],"need":[711],"more":[712],"no":[716,795],"strong":[717],"theo-retical":[718],"justi":[719],"cation.In":[720],"we":[724],"prop":[725],"ose":[726],"new":[728],"whicha":[731],"minimum":[732,850],"description":[733,851],"length(MDL)":[734],"criterion,":[735,739],"insteadof":[736],"clustering.The":[743],"MDLapproach[9":[744],"]":[745],"information":[750,819],"theoretic":[751],"criterion,which":[752],"selecting":[758,831],"probabilisti":[760],"c":[761],"delwith":[763],"appropriate":[765],"complexity":[766],"given":[769,887],"amountofdata.":[770],"MDL":[772,843],"select-ing":[780],"split,":[786],"but":[787],"deciding":[790],"whether":[791],"tostop":[792],"splitting.":[793],"isneeded":[799],"We":[806],"apply":[807],"thiscriterion":[808],"tree.2.MDL":[815],"CRITERIONMDL[9]":[816],"provento":[825],"from":[836,861],"amongvarious":[837],"probabilis":[838],"tic":[839],"selectsthe":[845],"length":[852,921],"thegiven":[854],"d-els.":[867],"When":[868],"delsf1;":[873],":::;i;:::;Igis":[874],"given,":[875],"de-scription":[877],"length,li(xN),":[878],"data,f=1;:::;xNg,together":[881],"underlying":[884],"deliis":[886,926],"by,l(i)=logP^\u0012(i)xN)+i2N+":[888],"logI(1)whereiis":[889],"dimensionali":[891],"ty":[892],"(the":[893],"free":[897],"param-eters)":[898],"deli,":[901],"and^\u0012(i)is":[902],"maximum":[904],"es-timates":[908],"parameters\u0012(i)=(1;:::;\u0012i)ofmodeli.":[911],"rst":[913],"term":[914,935],"(1)":[916],"co":[919],"de":[920],"dataxNwhen":[924],"probabili":[930],"stic":[931],"del.":[933]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":8},{"year":2012,"cited_by_count":10}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
