{"id":"https://openalex.org/W7123359464","doi":"https://doi.org/10.1109/lsp.2026.3653403","title":"Multimodal Cosine Similarity Transformer for Gloss-Guided Sign Language Recognition","display_name":"Multimodal Cosine Similarity Transformer for Gloss-Guided Sign Language Recognition","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7123359464","doi":"https://doi.org/10.1109/lsp.2026.3653403"},"language":null,"primary_location":{"id":"doi:10.1109/lsp.2026.3653403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2026.3653403","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122906236","display_name":"Lu Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210110558","display_name":"Xi'an Technological University","ror":"https://ror.org/01t8prc81","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210110558"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lu Li","raw_affiliation_strings":["Department of Mechanical and Electrical Engineering, Xi&#x2019;an Technological University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical and Electrical Engineering, Xi&#x2019;an Technological University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210110558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004725442","display_name":"Qinkun Xiao","orcid":"https://orcid.org/0000-0002-1392-3299"},"institutions":[{"id":"https://openalex.org/I4210110558","display_name":"Xi'an Technological University","ror":"https://ror.org/01t8prc81","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210110558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinkun Xiao","raw_affiliation_strings":["Department of Electronic and Information Engineering, Xi&#x2019;an Technological University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic and Information Engineering, Xi&#x2019;an Technological University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210110558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076715406","display_name":"Peiran Liu","orcid":"https://orcid.org/0000-0001-6229-0814"},"institutions":[{"id":"https://openalex.org/I4210110558","display_name":"Xi'an Technological University","ror":"https://ror.org/01t8prc81","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210110558"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peiran Liu","raw_affiliation_strings":["Department of Mechanical and Electrical Engineering, Xi&#x2019;an Technological University, Xi&#x2019;an, China"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical and Electrical Engineering, Xi&#x2019;an Technological University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I4210110558"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5122906236"],"corresponding_institution_ids":["https://openalex.org/I4210110558"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0890948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"33","issue":null,"first_page":"673","last_page":"677"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.001500000013038516,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.00139999995008111,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5652999877929688},{"id":"https://openalex.org/keywords/sign-language","display_name":"Sign language","score":0.545199990272522},{"id":"https://openalex.org/keywords/discrete-cosine-transform","display_name":"Discrete cosine transform","score":0.4860000014305115},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4788999855518341},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.46630001068115234},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4505999982357025},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.36309999227523804}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6949999928474426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6017000079154968},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5652999877929688},{"id":"https://openalex.org/C522192633","wikidata":"https://www.wikidata.org/wiki/Q34228","display_name":"Sign language","level":2,"score":0.545199990272522},{"id":"https://openalex.org/C2221639","wikidata":"https://www.wikidata.org/wiki/Q2877","display_name":"Discrete cosine transform","level":3,"score":0.4860000014305115},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4788999855518341},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.46630001068115234},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4505999982357025},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40310001373291016},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.36309999227523804},{"id":"https://openalex.org/C178009071","wikidata":"https://www.wikidata.org/wiki/Q93344","display_name":"Trigonometric functions","level":2,"score":0.3630000054836273},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.3628999888896942},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35929998755455017},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2621000111103058}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2026.3653403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2026.3653403","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.430363267660141}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2962858109","https://openalex.org/W2997931247","https://openalex.org/W3034765865","https://openalex.org/W3045196480","https://openalex.org/W3046952127","https://openalex.org/W3133226919","https://openalex.org/W3147467731","https://openalex.org/W3173290664","https://openalex.org/W3184215204","https://openalex.org/W3196466542","https://openalex.org/W3204447181","https://openalex.org/W4292265162","https://openalex.org/W4312910375","https://openalex.org/W4312988544","https://openalex.org/W4315473456","https://openalex.org/W4367047590","https://openalex.org/W4386065518","https://openalex.org/W4386071548","https://openalex.org/W4387844301","https://openalex.org/W4389523825","https://openalex.org/W4390873551","https://openalex.org/W4392607709","https://openalex.org/W4392903518","https://openalex.org/W4393154516","https://openalex.org/W4393160661","https://openalex.org/W4399167881","https://openalex.org/W4403534885","https://openalex.org/W4404838688"],"related_works":[],"abstract_inverted_index":{"Continuous":[0],"sign":[1],"language":[2],"recognition":[3],"(CSLR)":[4],"requires":[5],"fine-grained":[6],"alignment":[7,116],"between":[8],"visual":[9],"sequences":[10],"and":[11,23,42,49,86,101,111,117],"gloss":[12,53],"annotations":[13],"under":[14],"weak":[15],"supervision,":[16],"which":[17],"is":[18,80],"challenged":[19],"by":[20,82],"modality":[21],"heterogeneity":[22],"ambiguous":[24],"frame-to-gloss":[25],"correspondence.":[26],"We":[27],"propose":[28],"a":[29,56,83],"Multimodal":[30],"Cosine":[31],"Similarity":[32],"Transformer":[33,85],"(MMCST)":[34],"to":[35],"address":[36],"these":[37],"issues.":[38],"MMCST":[39,94],"integrates":[40],"RGB":[41],"keypoint":[43],"heatmap":[44],"features":[45],"via":[46],"gated":[47,108],"fusion,":[48,109],"aligns":[50],"them":[51],"with":[52,88],"embeddings":[54],"through":[55],"Gloss-Conditioned":[57],"Cosine-Normalized":[58],"Attention":[59],"(GCNA)":[60],"mechanism":[61],"that":[62,93],"stabilizes":[63],"cross-modal":[64],"alignment.":[65],"To":[66],"further":[67],"enhance":[68],"semantic":[69,115],"consistency,":[70],"we":[71],"introduce":[72],"Gloss-aware":[73],"Contrastive":[74],"Regularization":[75],"(GLCR).":[76],"The":[77],"fused":[78],"representation":[79],"modeled":[81],"cosine-similarity":[84],"decoded":[87],"CTC.":[89],"Experimental":[90],"results":[91],"show":[92],"achieves":[95],"consistent":[96],"improvements":[97],"over":[98],"strong":[99],"baselines,":[100],"ablation":[102],"studies":[103],"confirm":[104],"the":[105],"effectiveness":[106],"of":[107],"GCNA,":[110],"GLCR":[112],"in":[113],"improving":[114],"yielding":[118],"smoother":[119],"training":[120],"dynamics.":[121]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-01-14T00:00:00"}
