{"id":"https://openalex.org/W4375868832","doi":"https://doi.org/10.1109/icassp49357.2023.10095271","title":"Cross-Modal Mutual Learning for Cued Speech Recognition","display_name":"Cross-Modal Mutual Learning for Cued Speech Recognition","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375868832","doi":"https://doi.org/10.1109/icassp49357.2023.10095271"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095271","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095271","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033637712","display_name":"Lei Liu","orcid":"https://orcid.org/0000-0002-1332-9724"},"institutions":[{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Liu","raw_affiliation_strings":["The Chinese University of Hong Kong,Shenzhen Research Institute of Big Data,Shenzhen","The Hong Kong University of Science and Technology (Guangzhou)","Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Shenzhen Research Institute of Big Data,Shenzhen","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]},{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou)","institution_ids":[]},{"raw_affiliation_string":"Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100418866","display_name":"Li Liu","orcid":"https://orcid.org/0000-0002-9121-5124"},"institutions":[{"id":"https://openalex.org/I4210099586","display_name":"Shenzhen Research Institute of Big Data","ror":"https://ror.org/00z1gwf89","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210099586"]},{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Liu","raw_affiliation_strings":["The Chinese University of Hong Kong,Shenzhen Research Institute of Big Data,Shenzhen","The Hong Kong University of Science and Technology (Guangzhou)"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Shenzhen Research Institute of Big Data,Shenzhen","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I4210099586"]},{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou)","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033637712"],"corresponding_institution_ids":["https://openalex.org/I4210099586","https://openalex.org/I4210116924"],"apc_list":null,"apc_paid":null,"fwci":2.0697,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.87470517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11285","display_name":"Hearing Impairment and Communication","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7619284391403198},{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.6818593740463257},{"id":"https://openalex.org/keywords/cued-speech","display_name":"Cued speech","score":0.6756246089935303},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.647355318069458},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6406992673873901},{"id":"https://openalex.org/keywords/codebook","display_name":"Codebook","score":0.5815883278846741},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.5548466444015503},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5134938359260559},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.4840826094150543},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.47990334033966064},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4539858400821686},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3528759479522705},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15542978048324585}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7619284391403198},{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.6818593740463257},{"id":"https://openalex.org/C83195618","wikidata":"https://www.wikidata.org/wiki/Q590951","display_name":"Cued speech","level":2,"score":0.6756246089935303},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.647355318069458},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6406992673873901},{"id":"https://openalex.org/C127759330","wikidata":"https://www.wikidata.org/wiki/Q637416","display_name":"Codebook","level":2,"score":0.5815883278846741},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.5548466444015503},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5134938359260559},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.4840826094150543},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.47990334033966064},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4539858400821686},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3528759479522705},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15542978048324585},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095271","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095271","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-135393","is_oa":false,"landing_page_url":"http://lbdiscover.ust.hk/uresolver?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:journal&rfr_id=info:sid/HKUST:SPI&rft.genre=article&rft.issn=1520-6149&rft.volume=&rft.issue=&rft.date=2023&rft.spage=&rft.aulast=Liu&rft.aufirst=Lei&rft.atitle=Cross-Modal+Mutual+Learning+for+Cued+Speech+Recognition&rft.title=ICASSP%2C+IEEE+International+Conference+on+Acoustics%2C+Speech+and+Signal+Processing+-+Proceedings","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5699999928474426}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1502200391","https://openalex.org/W2194775991","https://openalex.org/W2799813293","https://openalex.org/W2888888638","https://openalex.org/W2972504708","https://openalex.org/W2985525390","https://openalex.org/W2998687373","https://openalex.org/W3008402854","https://openalex.org/W3113399631","https://openalex.org/W3162293946","https://openalex.org/W3196404295","https://openalex.org/W3196826198","https://openalex.org/W4225685860","https://openalex.org/W4283798744","https://openalex.org/W4319586818","https://openalex.org/W4385245566","https://openalex.org/W6630165351","https://openalex.org/W6687483927","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2293149949","https://openalex.org/W2026099691","https://openalex.org/W4284672201","https://openalex.org/W2377486419","https://openalex.org/W2943202426","https://openalex.org/W2950156284","https://openalex.org/W2736714427","https://openalex.org/W2163679795","https://openalex.org/W2137816434","https://openalex.org/W2017956276"],"abstract_inverted_index":{"Automatic":[0],"Cued":[1,15],"Speech":[2,16],"Recognition":[3],"(ACSR)":[4],"provides":[5],"an":[6],"intelligent":[7],"human-machine":[8],"interface":[9],"for":[10,29,61,128,141,148],"visual":[11],"communications,":[12],"where":[13],"the":[14,41,46,83,109,136,167],"(CS)":[17],"system":[18],"utilizes":[19],"lip":[20],"movements":[21],"and":[22,53,154],"hand":[23,51,54],"gestures":[24],"to":[25,77,94,115,166],"code":[26],"spoken":[27],"language":[28],"hearing-impaired":[30],"people.":[31],"Previous":[32],"ACSR":[33,140],"approaches":[34],"often":[35],"utilize":[36],"direct":[37],"feature":[38,62],"concatenation":[39],"as":[40],"main":[42],"fusion":[43],"paradigm.":[44],"However,":[45],"asynchronous":[47],"modalities":[48,93],"(i.e.,":[49,151],"lip,":[50],"shape":[52],"position)":[55],"in":[56],"CS":[57,126],"may":[58],"cause":[59],"interference":[60],"concatenation.":[63],"To":[64,131],"address":[65],"this":[66,134],"challenge,":[67],"we":[68,120],"propose":[69],"a":[70,97,122,170],"transformer":[71],"based":[72],"cross-modal":[73],"mutual":[74],"learning":[75],"framework":[76],"prompt":[78],"multi-modal":[79,117],"interaction.":[80],"Compared":[81],"with":[82,103],"vanilla":[84],"self-attention,":[85],"our":[86,132,160],"model":[87,161],"forces":[88],"modality-specific":[89],"information":[90],"of":[91,105],"different":[92,149],"pass":[95],"through":[96],"modality-invariant":[98],"codebook,":[99],"concatenating":[100],"linguistic":[101,111],"representations":[102],"tokens":[104],"each":[106],"modality.":[107],"Then":[108],"shared":[110],"knowledge":[112],"is":[113,135],"used":[114],"re-synchronize":[116],"sequences.":[118],"Moreover,":[119],"establish":[121],"novel":[123],"large-scale":[124],"multi-speaker":[125],"dataset":[127],"Mandarin":[129,142],"Chinese.":[130,143],"knowledge,":[133],"first":[137],"work":[138],"on":[139],"Extensive":[144],"experiments":[145],"are":[146],"conducted":[147],"languages":[150],"Chinese,":[152],"French,":[153],"British":[155],"English).":[156],"Results":[157],"demonstrate":[158],"that":[159],"exhibits":[162],"superior":[163],"recognition":[164],"performance":[165],"state-of-the-art":[168],"by":[169],"large":[171],"margin.":[172]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":3}],"updated_date":"2026-02-26T08:16:20.718346","created_date":"2025-10-10T00:00:00"}
