{"id":"https://openalex.org/W2395416438","doi":"https://doi.org/10.1109/icassp.2016.7472152","title":"An empirical exploration of CTC acoustic models","display_name":"An empirical exploration of CTC acoustic models","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2395416438","doi":"https://doi.org/10.1109/icassp.2016.7472152","mag":"2395416438"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472152","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472152","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003251887","display_name":"Yajie Miao","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yajie Miao","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088091232","display_name":"Mohammad Gowayyed","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Gowayyed","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012173647","display_name":"Xingyu Na","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingyu Na","raw_affiliation_strings":["Chinese Academy of Sciences, Institute of Acoustics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Acoustics, Beijing, China","institution_ids":["https://openalex.org/I4210099069","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038062913","display_name":"Tom Ko","orcid":"https://orcid.org/0000-0002-5324-8961"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tom Ko","raw_affiliation_strings":["Huawei Noah's Ark Research Lab, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Huawei Noah's Ark Research Lab, Hong Kong, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085262529","display_name":"Florian Metze","orcid":"https://orcid.org/0000-0002-6663-8600"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Florian Metze","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023053982","display_name":"Alexander Waibel","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Waibel","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5003251887"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":22.2806,"has_fulltext":false,"cited_by_count":86,"citation_normalized_percentile":{"value":0.994017,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2623","last_page":"2627"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8208004236221313},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.5677484273910522},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5613110661506653},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5574827790260315},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5549818873405457},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5519367456436157},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5485237836837769},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5046948194503784},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5042191743850708},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4923301935195923},{"id":"https://openalex.org/keywords/porting","display_name":"Porting","score":0.4902380108833313},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.46565157175064087},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4140235483646393},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36406123638153076},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3541008234024048},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3292575478553772}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8208004236221313},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.5677484273910522},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5613110661506653},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5574827790260315},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5549818873405457},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5519367456436157},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5485237836837769},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5046948194503784},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5042191743850708},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4923301935195923},{"id":"https://openalex.org/C106251023","wikidata":"https://www.wikidata.org/wiki/Q851989","display_name":"Porting","level":3,"score":0.4902380108833313},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.46565157175064087},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4140235483646393},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36406123638153076},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3541008234024048},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3292575478553772},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7472152","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472152","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6399999856948853,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W567546468","https://openalex.org/W581956982","https://openalex.org/W1489125746","https://openalex.org/W1524333225","https://openalex.org/W1526236009","https://openalex.org/W1600744878","https://openalex.org/W1616590059","https://openalex.org/W1922655562","https://openalex.org/W2002342963","https://openalex.org/W2005708641","https://openalex.org/W2048526313","https://openalex.org/W2064675550","https://openalex.org/W2085628288","https://openalex.org/W2102113734","https://openalex.org/W2117671523","https://openalex.org/W2127141656","https://openalex.org/W2147768505","https://openalex.org/W2160815625","https://openalex.org/W2293634267","https://openalex.org/W2403524927","https://openalex.org/W2407648438","https://openalex.org/W2963211739","https://openalex.org/W2963920996","https://openalex.org/W6615969787","https://openalex.org/W6616837769","https://openalex.org/W6631362777","https://openalex.org/W6640090968","https://openalex.org/W6675365184","https://openalex.org/W6696934422","https://openalex.org/W6713292810","https://openalex.org/W6713982112"],"related_works":["https://openalex.org/W2356602486","https://openalex.org/W2351992668","https://openalex.org/W2324828474","https://openalex.org/W2374315191","https://openalex.org/W2391207559","https://openalex.org/W2384715785","https://openalex.org/W2349624418","https://openalex.org/W2064459023","https://openalex.org/W2384611437","https://openalex.org/W2347873412"],"abstract_inverted_index":{"The":[0],"connectionist":[1],"temporal":[2],"classification":[3],"(CTC)":[4],"loss":[5],"function":[6],"has":[7],"several":[8],"interesting":[9],"properties":[10],"relevant":[11],"for":[12,41,54,93],"automatic":[13],"speech":[14,31],"recognition":[15],"(ASR):":[16],"applied":[17,75],"on":[18,98],"top":[19],"of":[20,71,79,86,101],"deep":[21],"recurrent":[22],"neural":[23],"networks":[24],"(RNNs),":[25],"CTC":[26,45,104,116],"learns":[27],"the":[28,39,87],"alignments":[29],"between":[30],"frames":[32],"and":[33,90,124],"label":[34],"sequences":[35],"automatically,":[36],"which":[37],"removes":[38],"need":[40],"pre-generated":[42],"frame-level":[43],"labels.":[44],"systems":[46],"also":[47],"do":[48],"not":[49],"require":[50],"context":[51],"decision":[52],"trees":[53],"good":[55],"performance,":[56],"using":[57],"context-independent":[58],"(CI)":[59],"phonemes":[60],"or":[61],"characters":[62],"as":[63],"targets.":[64],"This":[65],"paper":[66],"presents":[67],"an":[68,83],"extensive":[69],"exploration":[70],"CTC-based":[72],"acoustic":[73],"models":[74,105],"to":[76,107,121],"a":[77],"variety":[78],"ASR":[80],"tasks,":[81],"including":[82],"empirical":[84],"study":[85],"optimal":[88],"configuration":[89],"architectural":[91],"variants":[92],"CTC.":[94],"We":[95],"observe":[96],"that":[97,115],"large":[99],"amounts":[100],"training":[102],"data,":[103],"tend":[106],"outperform":[108],"state-of-the-art":[109],"hybrid":[110],"approach.":[111],"Further":[112],"experiments":[113],"reveal":[114],"can":[117,125],"be":[118,126],"readily":[119],"ported":[120],"syllable-based":[122],"languages,":[123],"enhanced":[127],"by":[128],"employing":[129],"improved":[130],"feature":[131],"front-ends.":[132]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":17},{"year":2017,"cited_by_count":21},{"year":2016,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
