{"id":"https://openalex.org/W4391759831","doi":"https://doi.org/10.1109/taslp.2024.3363446","title":"Computation and Parameter Efficient Multi-Modal Fusion Transformer for Cued Speech Recognition","display_name":"Computation and Parameter Efficient Multi-Modal Fusion Transformer for Cued Speech Recognition","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391759831","doi":"https://doi.org/10.1109/taslp.2024.3363446"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3363446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3363446","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059565360","display_name":"Lei Liu","orcid":"https://orcid.org/0000-0001-8109-5248"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Lei Liu","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, China","The Hong Kong University of Science and Technology(Guangzhou), Guangzhou, China","Shenzhen Research Institute of Big Data, Shenzhen, Guangdong, China","Chinese University of Hong Kong, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]},{"raw_affiliation_string":"The Hong Kong University of Science and Technology(Guangzhou), Guangzhou, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]},{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210099586"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418818","display_name":"Li Liu","orcid":"https://orcid.org/0000-0002-4497-0135"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Li Liu","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, China","The Hong Kong University of Science and Technology(Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]},{"raw_affiliation_string":"The Hong Kong University of Science and Technology(Guangzhou), Guangzhou, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]},{"id":"https://openalex.org/I180437899","display_name":"University of Bremen","ror":"https://ror.org/04ers2y35","country_code":"DE","type":"education","lineage":["https://openalex.org/I180437899"]},{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN","DE","SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["Shenzhen Research Institute of Big Data, School of Data Science, Chinese University of Hong Kong, Shenzhen, China","National University of Singapore, Singapore","University of Bremen, Bremen, Germany"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, School of Data Science, Chinese University of Hong Kong, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]},{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"University of Bremen, Bremen, Germany","institution_ids":["https://openalex.org/I180437899"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5059565360"],"corresponding_institution_ids":["https://openalex.org/I200769079","https://openalex.org/I4210099586","https://openalex.org/I4210116924","https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":4.1949,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.94663696,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"1559","last_page":"1572"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7578596472740173},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6367493867874146},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5401676297187805},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5228079557418823},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5117380619049072},{"id":"https://openalex.org/keywords/cued-speech","display_name":"Cued speech","score":0.49475187063217163},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4911334216594696},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4429202675819397},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.42806926369667053},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1319645345211029},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09456086158752441}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7578596472740173},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6367493867874146},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5401676297187805},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5228079557418823},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5117380619049072},{"id":"https://openalex.org/C83195618","wikidata":"https://www.wikidata.org/wiki/Q590951","display_name":"Cued speech","level":2,"score":0.49475187063217163},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4911334216594696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4429202675819397},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.42806926369667053},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1319645345211029},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09456086158752441},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2024.3363446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3363446","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-139598","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-139598","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G939200457","display_name":null,"funder_award_id":"62101351","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W430830562","https://openalex.org/W1972680065","https://openalex.org/W2084008764","https://openalex.org/W2115252128","https://openalex.org/W2115410550","https://openalex.org/W2194775991","https://openalex.org/W2799813293","https://openalex.org/W2888888638","https://openalex.org/W2890952074","https://openalex.org/W2985525390","https://openalex.org/W2998687373","https://openalex.org/W3008402854","https://openalex.org/W3015383493","https://openalex.org/W3015427680","https://openalex.org/W3033529678","https://openalex.org/W3034429256","https://openalex.org/W3035588244","https://openalex.org/W3035682985","https://openalex.org/W3085139254","https://openalex.org/W3092945658","https://openalex.org/W3096690837","https://openalex.org/W3106298483","https://openalex.org/W3113399631","https://openalex.org/W3151130473","https://openalex.org/W3162293946","https://openalex.org/W3163162786","https://openalex.org/W3191850102","https://openalex.org/W3196404295","https://openalex.org/W3196464216","https://openalex.org/W3196826198","https://openalex.org/W3196863408","https://openalex.org/W3197032408","https://openalex.org/W4225303417","https://openalex.org/W4225685860","https://openalex.org/W4288327876","https://openalex.org/W4312367758","https://openalex.org/W4320930577","https://openalex.org/W4323654151","https://openalex.org/W4375868832","https://openalex.org/W4376226279","https://openalex.org/W4385245566","https://openalex.org/W4385823053","https://openalex.org/W6630165351","https://openalex.org/W6634728284","https://openalex.org/W6677618333","https://openalex.org/W6681302627","https://openalex.org/W6731370813","https://openalex.org/W6757585730","https://openalex.org/W6761628794","https://openalex.org/W6764045775","https://openalex.org/W6771626834","https://openalex.org/W6774054309","https://openalex.org/W6779163297","https://openalex.org/W6779709467","https://openalex.org/W6780333223","https://openalex.org/W6781533629","https://openalex.org/W6783906718","https://openalex.org/W6783944145","https://openalex.org/W6788684253","https://openalex.org/W6789317445","https://openalex.org/W6791274649","https://openalex.org/W6803508786","https://openalex.org/W6809860847","https://openalex.org/W6810370753","https://openalex.org/W6811456798"],"related_works":["https://openalex.org/W2791807133","https://openalex.org/W4220960781","https://openalex.org/W2010535944","https://openalex.org/W2082933777","https://openalex.org/W2082371543","https://openalex.org/W3157864487","https://openalex.org/W3095039513","https://openalex.org/W3176549632","https://openalex.org/W62877638","https://openalex.org/W4304700937"],"abstract_inverted_index":{"Cued":[0,243],"Speech":[1,244],"(CS)":[2],"is":[3,176,229],"a":[4,92,114,153,164,171,224],"pure":[5],"visual":[6,35,50],"coding":[7],"method":[8,275],"used":[9],"by":[10,162],"hearing-impaired":[11,44],"people":[12,45],"that":[13,104],"combines":[14],"lip":[15,55],"reading":[16,56],"with":[17,264],"several":[18],"specific":[19],"hand":[20,58],"shapes":[21],"to":[22,33,46,77,98,106,178,231,283],"make":[23],"the":[24,61,79,100,107,119,123,144,180,184,192,206,211,217,233,278,288],"spoken":[25],"language":[26],"visible.":[27],"Automatic":[28],"CS":[29,53,89,254],"recognition":[30,138],"(ACSR)":[31],"seeks":[32],"transcribe":[34],"cues":[36],"of":[37,52,63,87,200,220,236],"speech":[38],"into":[39],"text,":[40],"which":[41],"can":[42],"help":[43],"communicate":[47],"effectively.":[48],"The":[49,238],"information":[51],"contains":[54],"and":[57,140,156,203,260,269,291],"cueing,":[59],"thus":[60],"fusion":[62,74,132,160,267,271],"them":[64],"plays":[65],"an":[66],"important":[67,181,218],"role":[68],"in":[69,83,126],"ACSR.":[70],"However,":[71],"most":[72],"previous":[73],"methods":[75,95,268],"struggle":[76],"capture":[78],"global":[80,120],"dependency":[81,121],"present":[82],"long":[84,124],"sequence":[85,125],"inputs":[86],"multi-modal":[88,127,131,159,185],"data.":[90],"As":[91],"result,":[93],"these":[94,149],"generally":[96],"fail":[97],"learn":[99],"effective":[101],"cross-modal":[102,208],"relationships":[103],"contribute":[105],"fusion.":[108],"Recently,":[109],"attentionbased":[110],"transformers":[111,133],"have":[112],"been":[113],"prevalent":[115],"idea":[116],"for":[117,143,210],"capturing":[118],"over":[122,197,216],"fusion,":[128],"but":[129],"existing":[130,253,265],"suffer":[134],"from":[135,183,281],"both":[136],"poor":[137],"accuracy":[139],"inefficient":[141],"computation":[142,155],"ACSR":[145,270],"task.":[146],"To":[147],"address":[148],"problems,":[150],"we":[151],"develop":[152],"novel":[154,165],"parameter":[157],"efficient":[158,207],"transformer":[161],"proposing":[163],"Token-Importance-Aware":[166],"Attention":[167],"mechanism":[168],"(TIAA),":[169],"where":[170],"token":[172],"utilization":[173],"rate":[174],"(TUR)":[175],"formulated":[177],"select":[179],"tokens":[182,199,219],"streams.":[186],"More":[187],"precisely,":[188],"TIAA":[189],"firstly":[190],"models":[191],"modality-specific":[193],"fine-grained":[194],"temporal":[195,214],"dependencies":[196,215],"all":[198,252],"each":[201],"modality,":[202],"then":[204],"learns":[205],"interaction":[209],"modality-shared":[212],"coarse-grained":[213],"different":[221],"modalities.":[222],"Besides,":[223],"lightweight":[225],"gated":[226],"hidden":[227],"projection":[228],"designed":[230],"control":[232],"feature":[234],"flows":[235],"TIAA.":[237],"resulting":[239],"model,":[240],"named":[241],"Economical":[242],"Fusion":[245],"Transformer":[246],"(EcoCued),":[247],"achieves":[248],"state-of-the-art":[249],"performance":[250],"on":[251],"datasets":[255],"(i.e.,":[256],"Mandarin":[257],"Chinese,":[258],"French,":[259],"British":[261],"CS),":[262],"compared":[263],"transformerbased":[266],"methods.":[272],"Notably,":[273],"our":[274],"dramatically":[276],"reduces":[277],"computational":[279],"complexity":[280],"O(T2)":[282],"O(T).":[284],"We":[285],"will":[286],"release":[287],"source":[289],"code":[290],"data":[292],"as":[293],"open":[294],"source.":[295]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
