{"id":"https://openalex.org/W2115585811","doi":"https://doi.org/10.1109/icassp.2011.5947394","title":"Improved models for Mandarin speech-to-text transcription","display_name":"Improved models for Mandarin speech-to-text transcription","publication_year":2011,"publication_date":"2011-05-01","ids":{"openalex":"https://openalex.org/W2115585811","doi":"https://doi.org/10.1109/icassp.2011.5947394","mag":"2115585811"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2011.5947394","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2011.5947394","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062446882","display_name":"Lori Lamel","orcid":"https://orcid.org/0000-0001-7443-9938"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Lori Lamel","raw_affiliation_strings":["Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE"],"affiliations":[{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109214406","display_name":"Jean\u2010Luc Gauvain","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jean-Luc Gauvain","raw_affiliation_strings":["Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE"],"affiliations":[{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110159187","display_name":"Viet Bac Le","orcid":"https://orcid.org/0000-0002-7267-6750"},"institutions":[{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Viet Bac Le","raw_affiliation_strings":["Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE"],"affiliations":[{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035766151","display_name":"Ilya Oparin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ilya Oparin","raw_affiliation_strings":["Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE"],"affiliations":[{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE","institution_ids":["https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086853840","display_name":"Meng Sha","orcid":"https://orcid.org/0000-0003-4492-1189"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sha Meng","raw_affiliation_strings":["Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE"],"affiliations":[{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI, CNRS, Orsay, France","institution_ids":["https://openalex.org/I4210115485","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Spoken Language Processing Group, LIMSI-CNRS, 91403 Orsay, FRANCE","institution_ids":["https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5062446882"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I4210115485"],"apc_list":null,"apc_paid":null,"fwci":6.8412,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.96794159,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4660","last_page":"4663"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.8513644933700562},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8057435750961304},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6437965631484985},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6028119325637817},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.58281409740448},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.556371808052063},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.536497175693512},{"id":"https://openalex.org/keywords/snippet","display_name":"Snippet","score":0.5095313191413879},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4763113260269165},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.4735371768474579},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.462425172328949},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.417472779750824},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1857644021511078},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.11400586366653442}],"concepts":[{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.8513644933700562},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8057435750961304},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6437965631484985},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6028119325637817},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.58281409740448},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.556371808052063},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.536497175693512},{"id":"https://openalex.org/C2777822670","wikidata":"https://www.wikidata.org/wiki/Q1120538","display_name":"Snippet","level":2,"score":0.5095313191413879},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4763113260269165},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.4735371768474579},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.462425172328949},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.417472779750824},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1857644021511078},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.11400586366653442},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2011.5947394","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2011.5947394","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5600000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W29925427","https://openalex.org/W44830073","https://openalex.org/W132821814","https://openalex.org/W177307080","https://openalex.org/W1491795195","https://openalex.org/W1492293509","https://openalex.org/W1507177964","https://openalex.org/W1520927580","https://openalex.org/W1603342183","https://openalex.org/W1970689298","https://openalex.org/W2016243284","https://openalex.org/W2026339097","https://openalex.org/W2096860695","https://openalex.org/W2106101143","https://openalex.org/W2121173708","https://openalex.org/W2128176022","https://openalex.org/W2142499181","https://openalex.org/W2152060930","https://openalex.org/W2159948109","https://openalex.org/W2167206253","https://openalex.org/W2261756304","https://openalex.org/W2296039685","https://openalex.org/W2437096199","https://openalex.org/W2594610113","https://openalex.org/W6601229365","https://openalex.org/W6605367993","https://openalex.org/W6607143959","https://openalex.org/W6615085950","https://openalex.org/W6629231598","https://openalex.org/W6629552394","https://openalex.org/W6635887616","https://openalex.org/W6657162565","https://openalex.org/W6697569992"],"related_works":["https://openalex.org/W2163874654","https://openalex.org/W50892825","https://openalex.org/W2594897229","https://openalex.org/W2151348424","https://openalex.org/W4221142855","https://openalex.org/W2050138804","https://openalex.org/W4290708361","https://openalex.org/W2129812225","https://openalex.org/W26527944","https://openalex.org/W2523799048"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"recent":[3],"advances":[4],"at":[5,70],"LIMSI":[6],"in":[7,19],"Mandarin":[8],"Chinese":[9],"speech-to-text":[10],"transcription.":[11],"A":[12],"number":[13],"of":[14,33,39,63,118],"novel":[15],"approaches":[16],"were":[17],"introduced":[18],"the":[20,98,121],"different":[21,71],"system":[22,96],"components.":[23],"The":[24,94],"acoustic":[25],"models":[26,52],"are":[27,53],"trained":[28,54],"on":[29,55,107,120],"over":[30,59],"1600":[31],"hours":[32],"audio":[34],"data":[35,113],"from":[36],"a":[37,116],"range":[38],"sources,":[40],"and":[41,44,48,65,111,125],"include":[42],"pitch":[43],"MLP":[45],"features.":[46],"N-gram":[47],"neural":[49],"network":[50],"language":[51],"very":[56],"large":[57],"corpora,":[58],"3":[60],"billion":[61],"words":[62],"texts;":[64],"LM":[66],"adaptation":[67,72],"was":[68,85],"explored":[69],"levels:":[73],"per":[74,76,79],"show,":[75],"snippet,":[77],"or":[78],"speaker":[80],"cluster.":[81],"Character-based":[82],"consensus":[83,90],"decoding":[84,91],"found":[86],"to":[87],"outperform":[88],"word-based":[89],"for":[92],"Mandarin.":[93],"improved":[95],"reduces":[97],"relative":[99],"character":[100],"error":[101],"rate":[102],"(CER)":[103],"by":[104],"about":[105],"10%":[106],"previous":[108],"GALE":[109],"development":[110],"evaluation":[112,128],"sets,":[114],"obtaining":[115],"CER":[117],"9.2%":[119],"P4":[122],"broadcast":[123,126],"news":[124],"conversation":[127],"data.":[129]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
