{"id":"https://openalex.org/W2013598660","doi":"https://doi.org/10.1109/slt.2012.6424210","title":"Improving wideband speech recognition using mixed-bandwidth training data in CD-DNN-HMM","display_name":"Improving wideband speech recognition using mixed-bandwidth training data in CD-DNN-HMM","publication_year":2012,"publication_date":"2012-12-01","ids":{"openalex":"https://openalex.org/W2013598660","doi":"https://doi.org/10.1109/slt.2012.6424210","mag":"2013598660"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2012.6424210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2012.6424210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA","Microsoft Corp., One Microsoft Way, Redmond, WA 98052"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corp., One Microsoft Way, Redmond, WA 98052","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA","Microsoft Corp., One Microsoft Way, Redmond, WA 98052"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corp., One Microsoft Way, Redmond, WA 98052","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045413298","display_name":"Jui-Ting Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jui-Ting Huang","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA","Microsoft Corp., One Microsoft Way, Redmond, WA 98052"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corp., One Microsoft Way, Redmond, WA 98052","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101928537","display_name":"Yifan Gong","orcid":"https://orcid.org/0000-0002-3912-097X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA","Microsoft Corp., One Microsoft Way, Redmond, WA 98052"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corp., One Microsoft Way, Redmond, WA 98052","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100365053"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":18.5688,"has_fulltext":false,"cited_by_count":113,"citation_normalized_percentile":{"value":0.99338052,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"131","last_page":"136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.8378596305847168},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.779757022857666},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7769006490707397},{"id":"https://openalex.org/keywords/wideband-audio","display_name":"Wideband audio","score":0.7455217838287354},{"id":"https://openalex.org/keywords/narrowband","display_name":"Narrowband","score":0.6865408420562744},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5590483546257019},{"id":"https://openalex.org/keywords/bandwidth-extension","display_name":"Bandwidth extension","score":0.520618200302124},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4948193430900574},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46364346146583557},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4517121911048889},{"id":"https://openalex.org/keywords/wideband","display_name":"Wideband","score":0.442121297121048},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.41856253147125244},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.32061707973480225},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.2304457128047943},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07788664102554321},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07160475850105286},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.06257352232933044}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.8378596305847168},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.779757022857666},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7769006490707397},{"id":"https://openalex.org/C173391809","wikidata":"https://www.wikidata.org/wiki/Q2740189","display_name":"Wideband audio","level":5,"score":0.7455217838287354},{"id":"https://openalex.org/C2776096036","wikidata":"https://www.wikidata.org/wiki/Q1140483","display_name":"Narrowband","level":2,"score":0.6865408420562744},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5590483546257019},{"id":"https://openalex.org/C9387945","wikidata":"https://www.wikidata.org/wiki/Q4854770","display_name":"Bandwidth extension","level":4,"score":0.520618200302124},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4948193430900574},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46364346146583557},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4517121911048889},{"id":"https://openalex.org/C2780202535","wikidata":"https://www.wikidata.org/wiki/Q4524457","display_name":"Wideband","level":2,"score":0.442121297121048},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.41856253147125244},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.32061707973480225},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.2304457128047943},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07788664102554321},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07160475850105286},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.06257352232933044},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.0},{"id":"https://openalex.org/C87687168","wikidata":"https://www.wikidata.org/wiki/Q173114","display_name":"Digital audio","level":4,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt.2012.6424210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2012.6424210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4099999964237213,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W88864901","https://openalex.org/W217970951","https://openalex.org/W587794757","https://openalex.org/W1923007241","https://openalex.org/W2076794394","https://openalex.org/W2108817613","https://openalex.org/W2114016253","https://openalex.org/W2125234026","https://openalex.org/W2125610823","https://openalex.org/W2132462584","https://openalex.org/W2133856945","https://openalex.org/W2135567392","https://openalex.org/W2141778357","https://openalex.org/W2144792281","https://openalex.org/W2147152002","https://openalex.org/W2147768505","https://openalex.org/W2158843472","https://openalex.org/W2169434751","https://openalex.org/W2172097686","https://openalex.org/W2394932179","https://openalex.org/W2465562739","https://openalex.org/W6681419218","https://openalex.org/W6685119187","https://openalex.org/W6719250660"],"related_works":["https://openalex.org/W2101490115","https://openalex.org/W3135807828","https://openalex.org/W1968385191","https://openalex.org/W2790274877","https://openalex.org/W1932653299","https://openalex.org/W2112665203","https://openalex.org/W2141852988","https://openalex.org/W2168435653","https://openalex.org/W2604437490","https://openalex.org/W4301058129"],"abstract_inverted_index":{"Context-dependent":[0],"deep":[1],"neural":[2],"network":[3],"hidden":[4],"Markov":[5],"model":[6,13,19],"(CD-DNN-HMM)":[7],"is":[8,104,121],"a":[9,89],"recently":[10],"proposed":[11,132],"acoustic":[12],"that":[14,54,130],"significantly":[15],"outperformed":[16],"Gaussian":[17],"mixture":[18],"(GMM)-HMM":[20],"systems":[21],"in":[22,48,93],"many":[23],"large":[24],"vocabulary":[25],"speech":[26,45,103,143],"recognition":[27,46,75,138],"(LVSR)":[28],"tasks.":[29],"In":[30],"this":[31],"paper":[32],"we":[33,70],"present":[34],"our":[35],"strategy":[36],"of":[37,59],"using":[38,60,64,78,168],"mixed-bandwidth":[39,85,112,152,156],"training":[40,86,109,157],"data":[41,113,128,158],"to":[42,150],"improve":[43],"wideband":[44,142],"accuracy":[47,76,139],"the":[49,57,65,84,131,141,147],"CD-DNN-HMM":[50,149,159],"framework.":[51],"We":[52],"show":[53],"DNNs":[55],"provide":[56],"flexibility":[58],"arbitrary":[61],"features.":[62],"By":[63,154],"Mel-scale":[66],"log-filter":[67],"bank":[68],"features":[69],"not":[71,134],"only":[72,135],"achieve":[73],"higher":[74,137],"than":[77],"MFCCs,":[79],"but":[80,144],"also":[81,145],"can":[82],"formulate":[83],"problem":[87],"as":[88],"missing":[90],"feature":[91,96],"problem,":[92],"which":[94,164],"several":[95],"dimensions":[97],"have":[98],"no":[99,118],"value":[100],"when":[101],"narrowband":[102,169],"presented.":[105],"This":[106],"treatment":[107],"makes":[108],"CD-DNN-HMMs":[110],"with":[111],"an":[114],"easy":[115],"task":[116],"since":[117],"bandwidth":[119],"extension":[120],"needed.":[122],"Our":[123],"experiments":[124],"on":[125],"voice":[126],"search":[127],"indicate":[129],"solution":[133],"provides":[136],"for":[140],"allows":[146],"same":[148],"recognize":[151],"speech.":[153],"exploiting":[155],"outperforms":[160],"fMPE+BMMI":[161],"trained":[162],"GMM-HMM,":[163],"cannot":[165],"benefit":[166],"from":[167],"data,":[170],"by":[171],"18.4%.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":15},{"year":2018,"cited_by_count":10},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":17},{"year":2014,"cited_by_count":16},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2025-10-10T00:00:00"}
