{"id":"https://openalex.org/W4205483237","doi":"https://doi.org/10.1109/bigdata52589.2021.9671417","title":"A Re-thinking ASR Modeling Framework using Attention Mechanisms","display_name":"A Re-thinking ASR Modeling Framework using Attention Mechanisms","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4205483237","doi":"https://doi.org/10.1109/bigdata52589.2021.9671417"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671417","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671417","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012877091","display_name":"Chih-Ying Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Chih-Ying Yang","raw_affiliation_strings":["National Taiwan University of Science and Technology, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115603153","display_name":"Kuan\u2010Yu Chen","orcid":"https://orcid.org/0000-0002-6036-2199"},"institutions":[{"id":"https://openalex.org/I154864474","display_name":"National Taiwan University of Science and Technology","ror":"https://ror.org/00q09pe49","country_code":"TW","type":"education","lineage":["https://openalex.org/I154864474"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Kuan-Yu Chen","raw_affiliation_strings":["National Taiwan University of Science and Technology, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University of Science and Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I154864474"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5012877091"],"corresponding_institution_ids":["https://openalex.org/I154864474"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18171621,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4530","last_page":"4536"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8141169548034668},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6956655979156494},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.6730815172195435},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.6600170135498047},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.6312256455421448},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.525988757610321},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5023093223571777},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.48162931203842163},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.47255584597587585},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43134674429893494},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4201028048992157},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.22234293818473816},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08218708634376526},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.062350064516067505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8141169548034668},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6956655979156494},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.6730815172195435},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.6600170135498047},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.6312256455421448},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.525988757610321},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5023093223571777},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.48162931203842163},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.47255584597587585},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43134674429893494},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4201028048992157},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.22234293818473816},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08218708634376526},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.062350064516067505},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671417","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671417","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322108","display_name":"Ministry of Science and Technology","ror":"https://ror.org/032e49973"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W854541894","https://openalex.org/W2097333193","https://openalex.org/W2127141656","https://openalex.org/W2133564696","https://openalex.org/W2143612262","https://openalex.org/W2171928131","https://openalex.org/W2327501763","https://openalex.org/W2526425061","https://openalex.org/W2577366047","https://openalex.org/W2746192915","https://openalex.org/W2752047430","https://openalex.org/W2766219058","https://openalex.org/W2888779557","https://openalex.org/W2892009249","https://openalex.org/W2896457183","https://openalex.org/W2911291251","https://openalex.org/W2916997151","https://openalex.org/W2962780374","https://openalex.org/W2963240019","https://openalex.org/W2963362078","https://openalex.org/W2965373594","https://openalex.org/W2970597249","https://openalex.org/W2972650231","https://openalex.org/W2972818416","https://openalex.org/W2973122799","https://openalex.org/W2976556660","https://openalex.org/W3007328579","https://openalex.org/W3008037978","https://openalex.org/W3011339933","https://openalex.org/W3015752032","https://openalex.org/W3016167541","https://openalex.org/W3028382961","https://openalex.org/W3033210410","https://openalex.org/W3097829404","https://openalex.org/W3097874139","https://openalex.org/W3133764785","https://openalex.org/W3137963805","https://openalex.org/W3141464856","https://openalex.org/W3156902660","https://openalex.org/W3180465246","https://openalex.org/W3211848854","https://openalex.org/W4206706211","https://openalex.org/W4252331534","https://openalex.org/W4294619417","https://openalex.org/W4385245566","https://openalex.org/W6623517193","https://openalex.org/W6674571003","https://openalex.org/W6679434410","https://openalex.org/W6723310180","https://openalex.org/W6732447497","https://openalex.org/W6739901393","https://openalex.org/W6743726175","https://openalex.org/W6747158283","https://openalex.org/W6754299077","https://openalex.org/W6755207826","https://openalex.org/W6763701032","https://openalex.org/W6766673545","https://openalex.org/W6777859140","https://openalex.org/W6779248606","https://openalex.org/W6788556936","https://openalex.org/W6791086126","https://openalex.org/W6791705549","https://openalex.org/W6794920533","https://openalex.org/W6798187346"],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2038083449","https://openalex.org/W3177678247","https://openalex.org/W1999617572","https://openalex.org/W2944572343","https://openalex.org/W2333799855","https://openalex.org/W2351687372","https://openalex.org/W2004087835","https://openalex.org/W2314871050"],"abstract_inverted_index":{"Several":[0],"reasons":[1],"have":[2,31],"led":[3],"to":[4,34,54,101,105,115],"the":[5,36,40,56,60,67,75,80,103,117,121,125],"widespread":[6],"adoption":[7],"of":[8,62,74],"neural-based":[9],"algorithms":[10],"for":[11],"end-to-end":[12],"automatic":[13],"speech":[14,77],"recognition":[15,37],"(ASR),":[16],"including":[17],"their":[18],"high":[19],"performance,":[20],"elegant":[21],"model":[22,129],"designs,":[23],"and":[24,66,71,92,133],"parallel":[25],"computing":[26],"capabilities.":[27],"Numerous":[28],"ASR":[29,50,82,128],"models":[30],"been":[32],"proposed":[33,118,126],"improve":[35],"results,":[38],"but":[39],"gains":[41],"are":[42,98],"still":[43],"insufficient.":[44],"This":[45],"paper":[46],"proposes":[47],"a":[48,63,84,88,93],"re-thinking":[49,81,127],"model,":[51,83],"which":[52],"aims":[53],"bridge":[55],"gap":[57],"by":[58],"rethinking":[59],"regularities":[61],"given":[64],"hypothesis":[65],"relationship":[68],"between":[69],"text-level":[70],"acoustic-level":[72],"characteristics":[73],"input":[76],"utterance.":[78],"For":[79],"mixed":[85],"attention":[86,90],"mechanism,":[87,91],"self-and-mixed":[89],"deep":[94],"acoustic":[95],"feature":[96],"extractor":[97],"meticulously":[99],"designed":[100],"enable":[102],"notion":[104],"be":[106],"realized.":[107],"A":[108],"publicly":[109],"available":[110],"benchmark":[111],"corpus":[112],"is":[113],"used":[114],"evaluate":[116],"model.":[119],"As":[120],"experimental":[122],"results":[123],"demonstrate,":[124],"can":[130],"provide":[131],"significant":[132],"consistent":[134],"improvements":[135],"over":[136],"popular":[137],"baseline":[138],"systems.":[139]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
