{"id":"https://openalex.org/W2144099410","doi":"https://doi.org/10.1109/icassp.2005.1415145","title":"Noisy Speech Recognition Based on Robust End-point Detection and Model Adaptation","display_name":"Noisy Speech Recognition Based on Robust End-point Detection and Model Adaptation","publication_year":2006,"publication_date":"2006-10-11","ids":{"openalex":"https://openalex.org/W2144099410","doi":"https://doi.org/10.1109/icassp.2005.1415145","mag":"2144099410"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2005.1415145","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2005.1415145","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. (ICASSP '05). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101957143","display_name":"Zhipeng Zhang","orcid":"https://orcid.org/0000-0002-3733-697X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhipeng Zhang","raw_affiliation_strings":["Multimedia Laboratories, NTT DoCoMo, Inc., Yokosuka, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Multimedia Laboratories, NTT DoCoMo, Inc., Yokosuka, Kanagawa, Japan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009532108","display_name":"Sadaoki Furui","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"S. Furui","raw_affiliation_strings":["Department of Computer Science, Tokyo Institute of Technology, Meguro, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Tokyo Institute of Technology, Meguro, Tokyo, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101957143"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.13287002,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"1","issue":null,"first_page":"441","last_page":"444"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8039896488189697},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7935014367103577},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7891709804534912},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.564625084400177},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5500746965408325},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5117864012718201},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5039300322532654},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.48405972123146057},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.47252729535102844},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4665493667125702},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4218841791152954},{"id":"https://openalex.org/keywords/speech-segmentation","display_name":"Speech segmentation","score":0.4196326434612274},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4131760597229004},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09634780883789062},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06740164756774902}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8039896488189697},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7935014367103577},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7891709804534912},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.564625084400177},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5500746965408325},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5117864012718201},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5039300322532654},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.48405972123146057},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.47252729535102844},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4665493667125702},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4218841791152954},{"id":"https://openalex.org/C207030507","wikidata":"https://www.wikidata.org/wiki/Q2266173","display_name":"Speech segmentation","level":3,"score":0.4196326434612274},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4131760597229004},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09634780883789062},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06740164756774902},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2005.1415145","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2005.1415145","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. (ICASSP '05). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.4399999976158142,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W125650242","https://openalex.org/W1986103221","https://openalex.org/W2009422037","https://openalex.org/W2032474878","https://openalex.org/W2041013097","https://openalex.org/W2146871184","https://openalex.org/W2153513468"],"related_works":["https://openalex.org/W1974738623","https://openalex.org/W2105626703","https://openalex.org/W2514679778","https://openalex.org/W3026276030","https://openalex.org/W2026858810","https://openalex.org/W2144673858","https://openalex.org/W2074307126","https://openalex.org/W642007152","https://openalex.org/W2168417340","https://openalex.org/W2738297198"],"abstract_inverted_index":{"How":[0],"to":[1,10,69],"detect":[2],"speech":[3,7,29,44,53,63,126,145],"periods":[4],"in":[5,142],"noisy":[6,28,43,125,144],"and":[8,37,88],"how":[9],"cope":[11],"with":[12,64],"the":[13,62,65,70,77,85,92,105,110,113,137],"temporal":[14],"variation":[15],"of":[16,97,112,115],"noise":[17,148],"characteristics":[18],"are":[19,101],"challenging":[20],"problems.":[21],"This":[22],"paper":[23],"proposes":[24],"a":[25,129],"new":[26],"robust":[27,34],"recognition":[30,78,86],"method":[31,121,139],"based":[32,75,83],"on":[33,76,84],"end-point":[35,73],"detection":[36,74],"online":[38],"model":[39],"adaptation":[40,82],"using":[41,91],"tree-structured":[42],"HMMs.":[45],"The":[46,95,119],"basic":[47],"algorithm":[48],"consists":[49],"of;":[50],"1)":[51,98],"blind":[52,106],"segmentation;":[54],"2)":[55],"best":[56],"matching":[57],"GMM":[58],"selection;":[59],"3)":[60],"recognizing":[61,143],"HMM":[66,81],"that":[67,136],"corresponds":[68],"GMM;":[71],"4)":[72],"results;":[79,87],"5)":[80],"6)":[89,100],"re-recognition":[90],"adapted":[93],"HMM.":[94],"processes":[96],"through":[99],"repeated":[102],"by":[103,124,128],"shifting":[104],"segmentation":[107],"window":[108],"until":[109],"end":[111],"sequence":[114],"utterances":[116],"is":[117,122,140],"detected.":[118],"proposed":[120,138],"evaluated":[123],"collected":[127],"Japanese":[130],"dialogue":[131],"system.":[132],"Experimental":[133],"results":[134],"show":[135],"effective":[141],"under":[146],"various":[147],"conditions.":[149]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
