{"id":"https://openalex.org/W2972430050","doi":"https://doi.org/10.21437/interspeech.2019-1975","title":"Char+CV-CTC: Combining Graphemes and Consonant/Vowel Units for CTC-Based ASR Using Multitask Learning","display_name":"Char+CV-CTC: Combining Graphemes and Consonant/Vowel Units for CTC-Based ASR Using Multitask Learning","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972430050","doi":"https://doi.org/10.21437/interspeech.2019-1975","mag":"2972430050"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-1975","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032875806","display_name":"Abdelwahab Heba","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I134560555","display_name":"Universit\u00e9 Toulouse III - Paul Sabatier","ror":"https://ror.org/02v6kpv12","country_code":"FR","type":"education","lineage":["https://openalex.org/I134560555"]},{"id":"https://openalex.org/I4210149153","display_name":"Linagora (France)","ror":"https://ror.org/03pd1sq22","country_code":"FR","type":"company","lineage":["https://openalex.org/I4210149153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Abdelwahab Heba","raw_affiliation_strings":["Linagora, France","IRIT, Universit\u00e9 Paul Sabatier, CNRS, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Linagora, France","institution_ids":["https://openalex.org/I4210149153"]},{"raw_affiliation_string":"IRIT, Universit\u00e9 Paul Sabatier, CNRS, France","institution_ids":["https://openalex.org/I134560555","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088037163","display_name":"Thomas Pellegrini","orcid":"https://orcid.org/0000-0001-8984-1399"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I134560555","display_name":"Universit\u00e9 Toulouse III - Paul Sabatier","ror":"https://ror.org/02v6kpv12","country_code":"FR","type":"education","lineage":["https://openalex.org/I134560555"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thomas Pellegrini","raw_affiliation_strings":["IRIT, Universit\u00e9 Paul Sabatier, CNRS, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRIT, Universit\u00e9 Paul Sabatier, CNRS, France","institution_ids":["https://openalex.org/I134560555","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056533367","display_name":"Jean-Pierre Lorr\u00e9","orcid":"https://orcid.org/0000-0002-1929-0728"},"institutions":[{"id":"https://openalex.org/I4210149153","display_name":"Linagora (France)","ror":"https://ror.org/03pd1sq22","country_code":"FR","type":"company","lineage":["https://openalex.org/I4210149153"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jean-Pierre Lorr\u00e9","raw_affiliation_strings":["Linagora, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Linagora, France","institution_ids":["https://openalex.org/I4210149153"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040937351","display_name":"R\u00e9gine Andre-Obrecht","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I134560555","display_name":"Universit\u00e9 Toulouse III - Paul Sabatier","ror":"https://ror.org/02v6kpv12","country_code":"FR","type":"education","lineage":["https://openalex.org/I134560555"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"R\u00e9gine Andre-Obrecht","raw_affiliation_strings":["IRIT, Universit\u00e9 Paul Sabatier, CNRS, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IRIT, Universit\u00e9 Paul Sabatier, CNRS, France","institution_ids":["https://openalex.org/I134560555","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1446,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.58369887,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1611","last_page":"1615"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7330233454704285},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6520920991897583},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5857145190238953},{"id":"https://openalex.org/keywords/vowel","display_name":"Vowel","score":0.5781314373016357},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5677573084831238},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5538027286529541},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.5268011689186096},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5259623527526855},{"id":"https://openalex.org/keywords/consonant","display_name":"Consonant","score":0.5254437327384949},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4602166414260864},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.194301038980484}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7330233454704285},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6520920991897583},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5857145190238953},{"id":"https://openalex.org/C2779581591","wikidata":"https://www.wikidata.org/wiki/Q36244","display_name":"Vowel","level":2,"score":0.5781314373016357},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5677573084831238},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5538027286529541},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.5268011689186096},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5259623527526855},{"id":"https://openalex.org/C2778203577","wikidata":"https://www.wikidata.org/wiki/Q38035","display_name":"Consonant","level":3,"score":0.5254437327384949},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4602166414260864},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.194301038980484},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2019-1975","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1975","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},{"id":"pmh:oai:oatao.univ-toulouse.fr:25028","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401987","display_name":"Open Archive Toulouse Archive Ouverte (University of Toulouse)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I17866349","host_organization_name":"Universit\u00e9 F\u00e9d\u00e9rale de Toulouse Midi-Pyr\u00e9n\u00e9es","host_organization_lineage":["https://openalex.org/I17866349"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:HAL:hal-02419431v1","is_oa":true,"landing_page_url":"https://hal.science/hal-02419431","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://interspeech2019.org/","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:oatao.univ-toulouse.fr:25028","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401987","display_name":"Open Archive Toulouse Archive Ouverte (University of Toulouse)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I17866349","host_organization_name":"Universit\u00e9 F\u00e9d\u00e9rale de Toulouse Midi-Pyr\u00e9n\u00e9es","host_organization_lineage":["https://openalex.org/I17866349"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5,"id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G8389395435","display_name":"Lightly-supervised and Unsupervised Discovery of Audio Units using Deep Learning","funder_award_id":"ANR-18-CE23-0005","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1526990717","https://openalex.org/W1847088711","https://openalex.org/W1942035323","https://openalex.org/W2046932483","https://openalex.org/W2095705004","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2295676751","https://openalex.org/W2507877159","https://openalex.org/W2526425061","https://openalex.org/W2603679025","https://openalex.org/W2752168051","https://openalex.org/W2884254529","https://openalex.org/W2884975363","https://openalex.org/W2899771611","https://openalex.org/W2953061907","https://openalex.org/W2962813140","https://openalex.org/W2963211739"],"related_works":["https://openalex.org/W1964706935","https://openalex.org/W2020946215","https://openalex.org/W2801025542","https://openalex.org/W1966580183","https://openalex.org/W2002773879","https://openalex.org/W2083136263","https://openalex.org/W4213283234","https://openalex.org/W4252296115","https://openalex.org/W2373394249","https://openalex.org/W2906310393"],"abstract_inverted_index":{"Previous":[0],"work":[1],"has":[2],"shown":[3],"that":[4,48],"end-to-end":[5],"neural-based":[6],"speech":[7,38],"recognition":[8,39],"systems":[9],"can":[10],"be":[11],"improved":[12],"by":[13,152],"adding":[14],"auxiliary":[15],"tasks":[16],"at":[17,40],"intermediate":[18],"layers.":[19],"In":[20],"this":[21],"paper,":[22],"we":[23,66],"report":[24],"multitask":[25],"learning":[26],"(MTL)":[27],"experiments":[28],"in":[29,103,155],"the":[30,71,78,90,95,104,108,115,123,141,161],"context":[31],"of":[32,107,164],"connectionist":[33],"temporal":[34],"classification":[35],"(CTC)":[36],"based":[37],"character":[41,72,80],"level.":[42],"We":[43],"compare":[44],"several":[45],"MTL":[46],"architectures":[47],"jointly":[49],"learn":[50],"to":[51,76,85],"predict":[52],"characters":[53,93],"(sometimes":[54],"called":[55],"graphemes)":[56],"and":[57,73,133,157],"consonant/vowel":[58],"(CV)":[59],"binary":[60],"labels.":[61],"The":[62,82],"best":[63,124],"approach,":[64],"which":[65],"call":[67],"Char+CV-CTC,":[68],"adds":[69],"up":[70],"CV":[74],"logits":[75],"obtain":[77],"final":[79],"predictions.":[81],"idea":[83],"is":[84,101],"put":[86],"more":[87],"weight":[88],"on":[89,114,140],"vowel":[91,96],"(consonant)":[92,97],"when":[94],"symbol":[98],"\u2018V\u2019":[99],"(\u2018C\u2019)":[100],"predicted":[102],"auxiliary-task":[105],"branch":[106],"network.":[109],"Experiments":[110],"were":[111],"carried":[112],"out":[113],"Wall":[116],"Street":[117],"Journal":[118],"(WSJ)":[119],"corpus.":[120],"Char+CV-CTC":[121],"achieved":[122,159],"ASR":[125],"results":[126],"with":[127],"a":[128,134,167],"2.2%":[129],"Character":[130],"Error":[131,137],"Rate":[132,138],"6.1%":[135],"Word":[136],"(WER)":[139],"Eval92":[142],"evaluation":[143],"subset.":[144],"This":[145],"model":[146,150],"outperformed":[147],"its":[148],"monotask":[149],"counterpart":[151],"0.7%":[153],"absolute":[154],"WER":[156],"also":[158],"almost":[160],"same":[162],"performance":[163],"6.0%":[165],"as":[166],"strong":[168],"baseline":[169],"phone-based":[170],"Time":[171],"Delay":[172],"Neural":[173],"Network":[174],"(\u201cTDNN-Phone+TR2\u201d)":[175],"model.":[176]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-20T22:02:38.213706","created_date":"2025-10-10T00:00:00"}
