{"id":"https://openalex.org/W4281395113","doi":"https://doi.org/10.1007/s11042-022-12136-3","title":"A hybrid CTC+Attention model based on end-to-end framework for multilingual speech recognition","display_name":"A hybrid CTC+Attention model based on end-to-end framework for multilingual speech recognition","publication_year":2022,"publication_date":"2022-05-20","ids":{"openalex":"https://openalex.org/W4281395113","doi":"https://doi.org/10.1007/s11042-022-12136-3"},"language":"en","primary_location":{"id":"doi:10.1007/s11042-022-12136-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11042-022-12136-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11042-022-12136-3.pdf","source":{"id":"https://openalex.org/S110206669","display_name":"Multimedia Tools and Applications","issn_l":"1380-7501","issn":["1380-7501","1573-7721"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Multimedia Tools and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11042-022-12136-3.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110916856","display_name":"Sendong Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I39854758","display_name":"Auckland University of Technology","ror":"https://ror.org/01zvqw119","country_code":"NZ","type":"education","lineage":["https://openalex.org/I39854758"]}],"countries":["NZ"],"is_corresponding":true,"raw_author_name":"Sendong Liang","raw_affiliation_strings":["School of Engineering, Computer & Mathematics, Auckland University of Technology, No. 31 Symonds Street, Auckland, 1010, New Zealand"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Computer & Mathematics, Auckland University of Technology, No. 31 Symonds Street, Auckland, 1010, New Zealand","institution_ids":["https://openalex.org/I39854758"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109048109","display_name":"Wei Qi Yan","orcid":"https://orcid.org/0000-0003-2573-0272"},"institutions":[{"id":"https://openalex.org/I39854758","display_name":"Auckland University of Technology","ror":"https://ror.org/01zvqw119","country_code":"NZ","type":"education","lineage":["https://openalex.org/I39854758"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Wei Qi Yan","raw_affiliation_strings":["School of Engineering, Computer & Mathematics, Auckland University of Technology, No. 31 Symonds Street, Auckland, 1010, New Zealand"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Computer & Mathematics, Auckland University of Technology, No. 31 Symonds Street, Auckland, 1010, New Zealand","institution_ids":["https://openalex.org/I39854758"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5110916856"],"corresponding_institution_ids":["https://openalex.org/I39854758"],"apc_list":null,"apc_paid":null,"fwci":1.6559,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.86095637,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"81","issue":"28","first_page":"41295","last_page":"41308"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9180855751037598},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.713690459728241},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.6346076726913452},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6241695880889893},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5637389421463013},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.538032591342926},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.47705474495887756},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4686051607131958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4539671540260315},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.42626678943634033},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40759405493736267},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07263869047164917}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9180855751037598},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.713690459728241},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.6346076726913452},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6241695880889893},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5637389421463013},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.538032591342926},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.47705474495887756},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4686051607131958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4539671540260315},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.42626678943634033},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40759405493736267},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07263869047164917},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s11042-022-12136-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11042-022-12136-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11042-022-12136-3.pdf","source":{"id":"https://openalex.org/S110206669","display_name":"Multimedia Tools and Applications","issn_l":"1380-7501","issn":["1380-7501","1573-7721"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Multimedia Tools and Applications","raw_type":"journal-article"},{"id":"pmh:oai:openrepository.aut.ac.nz:10292/15153","is_oa":true,"landing_page_url":"https://hdl.handle.net/10292/15153","pdf_url":null,"source":{"id":"https://openalex.org/S4306401809","display_name":"Tuwhera (Auckland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I39854758","host_organization_name":"Auckland University of Technology","host_organization_lineage":["https://openalex.org/I39854758"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1007/s11042-022-12136-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11042-022-12136-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11042-022-12136-3.pdf","source":{"id":"https://openalex.org/S110206669","display_name":"Multimedia Tools and Applications","issn_l":"1380-7501","issn":["1380-7501","1573-7721"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Multimedia Tools and Applications","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6899999976158142}],"awards":[],"funders":[{"id":"https://openalex.org/F4320310339","display_name":"Auckland University of Technology, New Zealand","ror":"https://ror.org/01zvqw119"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4281395113.pdf","grobid_xml":"https://content.openalex.org/works/W4281395113.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W1494198834","https://openalex.org/W1525636403","https://openalex.org/W1977445474","https://openalex.org/W2041252889","https://openalex.org/W2058695628","https://openalex.org/W2084500694","https://openalex.org/W2114569717","https://openalex.org/W2122028591","https://openalex.org/W2122364000","https://openalex.org/W2136922672","https://openalex.org/W2155273149","https://openalex.org/W2158373110","https://openalex.org/W2526425061","https://openalex.org/W2786031273","https://openalex.org/W2795798228","https://openalex.org/W2890197052","https://openalex.org/W2919448963","https://openalex.org/W2936123380","https://openalex.org/W2962780374","https://openalex.org/W2963211739","https://openalex.org/W2963226322","https://openalex.org/W2963785710","https://openalex.org/W2963963943","https://openalex.org/W2964539095","https://openalex.org/W2991356003","https://openalex.org/W2995085615","https://openalex.org/W3007227084","https://openalex.org/W3015927303","https://openalex.org/W3089901025","https://openalex.org/W3094841848","https://openalex.org/W3110932043","https://openalex.org/W6600292188","https://openalex.org/W6605905859","https://openalex.org/W6631814027"],"related_works":["https://openalex.org/W2235458433","https://openalex.org/W1953068910","https://openalex.org/W2964648601","https://openalex.org/W2485759381","https://openalex.org/W2917344756","https://openalex.org/W1493946344","https://openalex.org/W4205868073","https://openalex.org/W2143620265","https://openalex.org/W3193318782","https://openalex.org/W3011988934"],"abstract_inverted_index":{"Speech":[0],"recognition":[1,18,65,126],"is":[2,22],"an":[3],"important":[4],"field":[5],"in":[6,87,113,134],"natural":[7],"language":[8,116],"processing.":[9],"In":[10,43,60],"this":[11,44,135],"paper,":[12,45],"the":[13,35,82,96,110,122,143,150,154],"end-to-end":[14,25,103],"framework":[15,104],"for":[16,67],"speech":[17,64,125],"with":[19,109,127],"multilingual":[20,88],"datasets":[21],"proposed.":[23],"The":[24,137],"methods":[26,66],"do":[27],"not":[28],"require":[29],"complicated":[30],"alignment":[31],"and":[32,53,72,78,117,157],"construction":[33],"of":[34,51,124,142],"pronunciation":[36],"dictionary,":[37],"which":[38],"show":[39],"a":[40,48,114],"promising":[41],"prospect.":[42],"we":[46,70,93],"implement":[47],"hybrid":[49,84,98,145],"model":[50,56,86,100,112,147,156],"CTC":[52],"attention":[54],"(CTC+Attention)":[55],"based":[57,101,148],"on":[58,102,149],"PyTorch.":[59],"order":[61],"to":[62,129],"compare":[63],"multiple":[68],"languages,":[69],"design":[71],"create":[73],"three":[74],"datasets:":[75],"Chinese,":[76],"English,":[77],"Code-Switch.":[79],"We":[80],"evaluate":[81],"proposed":[83,97,144],"CTC+Attention":[85,99,146],"environment.":[89,120],"Throughout":[90],"our":[91],"experiments,":[92],"find":[94],"that":[95],"achieves":[105],"better":[106],"performance":[107],"compared":[108,133],"HMM-DNN":[111],"single":[115],"Code-Switch":[118],"speaking":[119],"Moreover,":[121],"results":[123],"regard":[128],"different":[130],"languages":[131],"are":[132],"paper.":[136],"CER(i.e.,":[138],"Character":[139],"Error":[140],"Rate)":[141],"Chinese":[151],"dataset":[152],"defeated":[153],"traditional":[155],"reached":[158],"10.22%.":[159]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":5}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
