{"id":"https://openalex.org/W4399378048","doi":"https://doi.org/10.1109/tcsvt.2024.3409728","title":"MASA: Motion-Aware Masked Autoencoder With Semantic Alignment for Sign Language Recognition","display_name":"MASA: Motion-Aware Masked Autoencoder With Semantic Alignment for Sign Language Recognition","publication_year":2024,"publication_date":"2024-06-05","ids":{"openalex":"https://openalex.org/W4399378048","doi":"https://doi.org/10.1109/tcsvt.2024.3409728"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3409728","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3409728","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057827327","display_name":"Weichao Zhao","orcid":"https://orcid.org/0000-0001-7098-1690"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weichao Zhao","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035950233","display_name":"Hezhen Hu","orcid":"https://orcid.org/0000-0003-0327-1562"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Hezhen Hu","raw_affiliation_strings":["Visual Informatics Group, University of Texas at Austin, Austin, TX, USA","Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Visual Informatics Group, University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046805800","display_name":"Wengang Zhou","orcid":"https://orcid.org/0000-0003-1690-9836"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wengang Zhou","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088650924","display_name":"Yunyao Mao","orcid":"https://orcid.org/0000-0002-9427-9086"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunyao Mao","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340979","display_name":"Min Wang","orcid":"https://orcid.org/0000-0003-3048-6980"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Wang","raw_affiliation_strings":["Hefei Comprehensive National Science Center, Institute of Artificial Intelligence, Hefei, China","Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Hefei Comprehensive National Science Center, Institute of Artificial Intelligence, Hefei, China","institution_ids":[]},{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078141810","display_name":"Houqiang Li","orcid":"https://orcid.org/0000-0003-2188-3028"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Houqiang Li","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, CAS Key Laboratory of Technology in Geospatial Information Processing and Application System, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5057827327"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":7.6812,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.97813936,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"34","issue":"11","first_page":"10793","last_page":"10804"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7772587537765503},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6824873685836792},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6374858021736145},{"id":"https://openalex.org/keywords/sign-language","display_name":"Sign language","score":0.6108149290084839},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5217757821083069},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5137470960617065},{"id":"https://openalex.org/keywords/sign","display_name":"Sign (mathematics)","score":0.44866877794265747},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.43360504508018494},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4206576943397522},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3902484178543091},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.1652367115020752},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.14883869886398315},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10180017352104187}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7772587537765503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6824873685836792},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6374858021736145},{"id":"https://openalex.org/C522192633","wikidata":"https://www.wikidata.org/wiki/Q34228","display_name":"Sign language","level":2,"score":0.6108149290084839},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5217757821083069},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5137470960617065},{"id":"https://openalex.org/C139676723","wikidata":"https://www.wikidata.org/wiki/Q1193832","display_name":"Sign (mathematics)","level":2,"score":0.44866877794265747},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.43360504508018494},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4206576943397522},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3902484178543091},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.1652367115020752},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.14883869886398315},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10180017352104187},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3409728","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3409728","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G5038960961","display_name":null,"funder_award_id":"U20A20183","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6308808780","display_name":null,"funder_award_id":"62021001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1592197329","https://openalex.org/W1950788856","https://openalex.org/W1965483705","https://openalex.org/W1994616650","https://openalex.org/W2004074725","https://openalex.org/W2020163092","https://openalex.org/W2113912885","https://openalex.org/W2121341364","https://openalex.org/W2157548127","https://openalex.org/W2321533354","https://openalex.org/W2326925005","https://openalex.org/W2842511635","https://openalex.org/W2883429621","https://openalex.org/W2891726870","https://openalex.org/W2895638065","https://openalex.org/W2896457183","https://openalex.org/W2944068606","https://openalex.org/W2963076818","https://openalex.org/W2963369114","https://openalex.org/W2963465221","https://openalex.org/W2963524571","https://openalex.org/W2963820951","https://openalex.org/W2972662547","https://openalex.org/W2990152177","https://openalex.org/W3009828227","https://openalex.org/W3033228789","https://openalex.org/W3034269985","https://openalex.org/W3035524453","https://openalex.org/W3046544838","https://openalex.org/W3046952127","https://openalex.org/W3081334315","https://openalex.org/W3108425892","https://openalex.org/W3135189994","https://openalex.org/W3139022918","https://openalex.org/W3145385912","https://openalex.org/W3173262825","https://openalex.org/W3176780013","https://openalex.org/W3184385152","https://openalex.org/W3193694068","https://openalex.org/W3196466542","https://openalex.org/W3202747033","https://openalex.org/W3206385822","https://openalex.org/W3206600927","https://openalex.org/W3208937263","https://openalex.org/W4205517074","https://openalex.org/W4206341720","https://openalex.org/W4230527737","https://openalex.org/W4313156423","https://openalex.org/W4366377773","https://openalex.org/W4367047590","https://openalex.org/W4382240029","https://openalex.org/W4384787507","https://openalex.org/W4385245566","https://openalex.org/W4385800861","https://openalex.org/W4386065787","https://openalex.org/W4386071548","https://openalex.org/W4386075496","https://openalex.org/W4386076385","https://openalex.org/W4389104669","https://openalex.org/W4390189731","https://openalex.org/W4390871931","https://openalex.org/W4390872802","https://openalex.org/W4400527798","https://openalex.org/W6747899497","https://openalex.org/W6756891207","https://openalex.org/W6757817989","https://openalex.org/W6782324824","https://openalex.org/W6804160461","https://openalex.org/W6810265253"],"related_works":["https://openalex.org/W3013693939","https://openalex.org/W2159052453","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2734887215","https://openalex.org/W4297051394","https://openalex.org/W2752972570","https://openalex.org/W2145836866","https://openalex.org/W2803255133","https://openalex.org/W1989687946"],"abstract_inverted_index":{"Sign":[0],"language":[1,213],"recognition":[2],"(SLR)":[3],"has":[4],"long":[5],"been":[6],"plagued":[7],"by":[8,28,178],"insufficient":[9],"model":[10],"representation":[11,63],"capabilities.":[12],"Although":[13],"current":[14],"pre-training":[15],"approaches":[16],"have":[17],"alleviated":[18],"this":[19,90,196],"dilemma":[20],"to":[21,57,150,220],"some":[22],"extent":[23],"and":[24,61,107,130,206],"yielded":[25],"promising":[26],"performance":[27,230],"employing":[29],"various":[30],"pretext":[31,54],"tasks":[32],"on":[33,69,231],"sign":[34,75,164,212],"pose":[35,76,165],"data,":[36],"these":[37],"methods":[38,67],"still":[39],"suffer":[40],"from":[41,186],"two":[42,121],"primary":[43],"limitations:":[44],"i)":[45],"Explicit":[46],"motion":[47,105,152,161,204],"information":[48,59,110],"is":[49],"usually":[50],"disregarded":[51],"in":[52,111,138,168,190],"previous":[53],"tasks,":[55],"leading":[56],"partial":[58],"loss":[60],"limited":[62],"capability.":[64],"ii)":[65],"Previous":[66],"focus":[68],"the":[70,80,83,180,187,191,222],"local":[71,203],"context":[72],"of":[73,82,86,154,182,224],"a":[74,94,112,125,131,146],"sequence,":[77],"without":[78],"incorporating":[79],"guidance":[81],"global":[84,108,175,207],"meaning":[85],"lexical":[87],"signs.":[88],"To":[89],"end,":[91],"we":[92,140,170,216],"propose":[93],"Motion-Aware":[95],"masked":[96,127,148,155],"autoencoder":[97,128,143],"with":[98,145,174],"Semantic":[99],"Alignment":[100],"(MASA)":[101],"that":[102],"integrates":[103],"rich":[104],"cues":[106,162,205],"semantic":[109,133,176,208],"self-supervised":[113],"learning":[114],"paradigm":[115],"for":[116,210],"SLR.":[117],"Our":[118],"framework":[119,173,199],"contains":[120],"crucial":[122],"components,":[123],"i.e.,":[124],"motion-aware":[126,147],"(MA)":[129],"momentum":[132],"alignment":[134],"module":[135],"(SA).":[136],"Specifically,":[137],"MA,":[139],"introduce":[141],"an":[142],"architecture":[144],"strategy":[149],"reconstruct":[151],"residuals":[153],"frames,":[156],"thereby":[157],"explicitly":[158],"exploring":[159],"dynamic":[160],"among":[163],"sequences.":[166],"Moreover,":[167],"SA,":[169],"embed":[171],"our":[172,198,225],"awareness":[177],"aligning":[179],"embeddings":[181],"different":[183],"augmented":[184],"samples":[185],"input":[188],"sequence":[189],"shared":[192],"latent":[193],"space.":[194],"In":[195],"way,":[197],"can":[200],"simultaneously":[201],"learn":[202],"features":[209],"comprehensive":[211],"representation.":[214],"Furthermore,":[215],"conduct":[217],"extensive":[218],"experiments":[219],"validate":[221],"effectiveness":[223],"method,":[226],"achieving":[227],"new":[228],"state-of-the-art":[229],"four":[232],"public":[233],"benchmarks.":[234],"The":[235],"source":[236],"code":[237],"are":[238],"publicly":[239],"available":[240],"at":[241],"<uri":[242],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[243],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/sakura/MASA</uri>.":[244]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":4}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
