{"id":"https://openalex.org/W4397039346","doi":"https://doi.org/10.1007/s40747-024-01451-x","title":"Sla-former: conformer using shifted linear attention for audio-visual speech recognition","display_name":"Sla-former: conformer using shifted linear attention for audio-visual speech recognition","publication_year":2024,"publication_date":"2024-05-18","ids":{"openalex":"https://openalex.org/W4397039346","doi":"https://doi.org/10.1007/s40747-024-01451-x"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-024-01451-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01451-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01451-x.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01451-x.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017941903","display_name":"Yewei Xiao","orcid":"https://orcid.org/0000-0001-9689-3760"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yewei Xiao","raw_affiliation_strings":["Institute of Automation and Electronic Information, Xiangtan University, Hunan, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation and Electronic Information, Xiangtan University, Hunan, China","institution_ids":["https://openalex.org/I4610292","https://openalex.org/I4210094879"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052165312","display_name":"Jian Huang","orcid":"https://orcid.org/0009-0006-1898-3756"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Huang","raw_affiliation_strings":["Institute of Automation and Electronic Information, Xiangtan University, Hunan, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation and Electronic Information, Xiangtan University, Hunan, China","institution_ids":["https://openalex.org/I4610292","https://openalex.org/I4210094879"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031321326","display_name":"Xuanming Liu","orcid":"https://orcid.org/0000-0002-3191-6957"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanming Liu","raw_affiliation_strings":["Institute of Automation and Electronic Information, Xiangtan University, Hunan, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation and Electronic Information, Xiangtan University, Hunan, China","institution_ids":["https://openalex.org/I4610292","https://openalex.org/I4210094879"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110935377","display_name":"Aosu Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4610292","display_name":"Xiangtan University","ror":"https://ror.org/00xsfaz62","country_code":"CN","type":"education","lineage":["https://openalex.org/I4610292"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aosu Zhu","raw_affiliation_strings":["Institute of Automation and Electronic Information, Xiangtan University, Hunan, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation and Electronic Information, Xiangtan University, Hunan, China","institution_ids":["https://openalex.org/I4610292","https://openalex.org/I4210094879"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5017941903"],"corresponding_institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4610292"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":1.0487,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.74331636,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"10","issue":"4","first_page":"5721","last_page":"5741"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6794523000717163},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6494401097297668},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5034386515617371},{"id":"https://openalex.org/keywords/computational-intelligence","display_name":"Computational intelligence","score":0.4652553200721741},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4087565541267395},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36603498458862305},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.13084477186203003}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6794523000717163},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6494401097297668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5034386515617371},{"id":"https://openalex.org/C139502532","wikidata":"https://www.wikidata.org/wiki/Q1122090","display_name":"Computational intelligence","level":2,"score":0.4652553200721741},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4087565541267395},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36603498458862305},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.13084477186203003}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-024-01451-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01451-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01451-x.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:161fb7353b394aadbfa77731cc1122d9","is_oa":true,"landing_page_url":"https://doaj.org/article/161fb7353b394aadbfa77731cc1122d9","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 10, Iss 4, Pp 5721-5741 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-024-01451-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01451-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01451-x.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4397039346.pdf"},"referenced_works_count":70,"referenced_works":["https://openalex.org/W1954573324","https://openalex.org/W1981276685","https://openalex.org/W2121486117","https://openalex.org/W2144499799","https://openalex.org/W2194775991","https://openalex.org/W2255253655","https://openalex.org/W2281031972","https://openalex.org/W2517194566","https://openalex.org/W2550143307","https://openalex.org/W2551572271","https://openalex.org/W2556171197","https://openalex.org/W2740570963","https://openalex.org/W2752782242","https://openalex.org/W2766219058","https://openalex.org/W2890952074","https://openalex.org/W2949662773","https://openalex.org/W2963250244","https://openalex.org/W2963341071","https://openalex.org/W2963528589","https://openalex.org/W2963785710","https://openalex.org/W2963925437","https://openalex.org/W2964110616","https://openalex.org/W2972756321","https://openalex.org/W2972775954","https://openalex.org/W2981501041","https://openalex.org/W2996970093","https://openalex.org/W3006974783","https://openalex.org/W3007589762","https://openalex.org/W3015356123","https://openalex.org/W3015383493","https://openalex.org/W3015830103","https://openalex.org/W3015995734","https://openalex.org/W3016011581","https://openalex.org/W3034552680","https://openalex.org/W3035042697","https://openalex.org/W3035299099","https://openalex.org/W3097777922","https://openalex.org/W3104792420","https://openalex.org/W3119418740","https://openalex.org/W3147254695","https://openalex.org/W3162293946","https://openalex.org/W3163652268","https://openalex.org/W3163842642","https://openalex.org/W3177318507","https://openalex.org/W3197813307","https://openalex.org/W3199527474","https://openalex.org/W4205280640","https://openalex.org/W4221154745","https://openalex.org/W4224237294","https://openalex.org/W4224917001","https://openalex.org/W4225727438","https://openalex.org/W4242175757","https://openalex.org/W4248881089","https://openalex.org/W4283654390","https://openalex.org/W4297841641","https://openalex.org/W4307286264","https://openalex.org/W4308509827","https://openalex.org/W4309744061","https://openalex.org/W4312638101","https://openalex.org/W4312790276","https://openalex.org/W4317623085","https://openalex.org/W4319300051","https://openalex.org/W4320036893","https://openalex.org/W4320717244","https://openalex.org/W4372259771","https://openalex.org/W4372259858","https://openalex.org/W4392908934","https://openalex.org/W6600109629","https://openalex.org/W6600195168","https://openalex.org/W6601949647"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"Abstract":[0],"Conformer-based":[1],"models":[2,31],"have":[3],"proven":[4],"highly":[5],"effective":[6],"in":[7,185],"Audio-visual":[8],"Speech":[9],"Recognition,":[10],"integrating":[11],"auditory":[12],"and":[13,38,109,166,174],"visual":[14],"inputs":[15],"to":[16,77],"significantly":[17],"enhance":[18],"speech":[19,187],"recognition":[20,188],"accuracy.":[21],"However,":[22],"the":[23,53,62,86,89,100,116,169],"widely":[24],"utilized":[25],"softmax":[26,78],"attention":[27,72],"mechanism":[28],"within":[29,135],"conformer":[30,63],"encounters":[32],"scalability":[33],"issues,":[34,97],"with":[35,43],"its":[36,182],"spatial":[37],"temporal":[39],"complexity":[40],"escalating":[41],"quadratically":[42],"sequence":[44],"length.":[45],"To":[46,94],"address":[47],"these":[48,96],"challenges,":[49],"this":[50],"paper":[51],"introduces":[52],"Shifted":[54,65],"Linear":[55,66],"Attention":[56,67],"Conformer,":[57],"an":[58,110,128],"evolved":[59],"iteration":[60],"of":[61,85,91,102,118,164],"architecture.":[64],"Conformer":[68],"adopts":[69],"shifted":[70],"linear":[71,92],"as":[73],"a":[74,82,103],"scalable":[75],"alternative":[76],"attention.":[79,93],"We":[80],"conducted":[81],"thorough":[83],"analysis":[84],"factors":[87],"constraining":[88],"efficiency":[90],"mitigate":[95],"we":[98,126],"propose":[99],"utilization":[101],"straightforward":[104],"yet":[105],"potent":[106],"mapping":[107],"function":[108],"efficient":[111],"rank":[112],"restoration":[113],"module,":[114],"enhancing":[115,139],"effectiveness":[117],"self-attention":[119],"while":[120],"maintaining":[121],"low":[122],"computational":[123],"complexity.":[124],"Furthermore,":[125],"integrate":[127],"advanced":[129],"attention-shifting":[130],"technique":[131],"facilitating":[132],"token":[133],"manipulation":[134],"attentional":[136],"mechanisms,":[137],"thereby":[138],"information":[140],"flow":[141],"across":[142],"various":[143],"groups.":[144],"This":[145],"three-part":[146],"approach":[147],"enhances":[148],"cognitive":[149],"computations,":[150],"particularly":[151],"beneficial":[152],"for":[153],"processing":[154],"longer":[155],"sequences.":[156],"Our":[157],"model":[158],"achieves":[159],"exceptional":[160],"Word":[161],"Error":[162],"Rates":[163],"1.9%":[165],"1.5%":[167],"on":[168],"Lip":[170,175],"Reading":[171,176],"Sentences":[172,177],"2":[173],"3":[178],"datasets,":[179],"respectively,":[180],"showcasing":[181],"state-of-the-art":[183],"performance":[184],"audio-visual":[186],"tasks.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
