{"id":"https://openalex.org/W7131116057","doi":"https://doi.org/10.1109/iccvw69036.2025.00527","title":"AutoSign: Direct Pose-to-Text Translation for Continuous Sign Language Recognition","display_name":"AutoSign: Direct Pose-to-Text Translation for Continuous Sign Language Recognition","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131116057","doi":"https://doi.org/10.1109/iccvw69036.2025.00527"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078147076","display_name":"Samuel Ebimobowei Johnny","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130200","display_name":"Carnegie Mellon University Africa","ror":"https://ror.org/02f33m021","country_code":"RW","type":"education","lineage":["https://openalex.org/I4210130200","https://openalex.org/I74973139"]}],"countries":["RW"],"is_corresponding":true,"raw_author_name":"Samuel Ebimobowei Johnny","raw_affiliation_strings":["Carnegie Mellon University Africa,Kigali,Rwanda"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University Africa,Kigali,Rwanda","institution_ids":["https://openalex.org/I4210130200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126624466","display_name":"Blessed Guda","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130200","display_name":"Carnegie Mellon University Africa","ror":"https://ror.org/02f33m021","country_code":"RW","type":"education","lineage":["https://openalex.org/I4210130200","https://openalex.org/I74973139"]}],"countries":["RW"],"is_corresponding":false,"raw_author_name":"Blessed Guda","raw_affiliation_strings":["Carnegie Mellon University Africa,Kigali,Rwanda"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University Africa,Kigali,Rwanda","institution_ids":["https://openalex.org/I4210130200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112940094","display_name":"Andrew T. Stephen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130200","display_name":"Carnegie Mellon University Africa","ror":"https://ror.org/02f33m021","country_code":"RW","type":"education","lineage":["https://openalex.org/I4210130200","https://openalex.org/I74973139"]}],"countries":["RW"],"is_corresponding":false,"raw_author_name":"Andrew Blayama Stephen","raw_affiliation_strings":["Carnegie Mellon University Africa,Kigali,Rwanda"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University Africa,Kigali,Rwanda","institution_ids":["https://openalex.org/I4210130200"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062949271","display_name":"Assane Gu\u00e8ye","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130200","display_name":"Carnegie Mellon University Africa","ror":"https://ror.org/02f33m021","country_code":"RW","type":"education","lineage":["https://openalex.org/I4210130200","https://openalex.org/I74973139"]}],"countries":["RW"],"is_corresponding":false,"raw_author_name":"Assane Gueye","raw_affiliation_strings":["Carnegie Mellon University Africa,Kigali,Rwanda"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University Africa,Kigali,Rwanda","institution_ids":["https://openalex.org/I4210130200"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5078147076"],"corresponding_institution_ids":["https://openalex.org/I4210130200"],"apc_list":null,"apc_paid":null,"fwci":1.9742,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90371363,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"5071","last_page":"5078"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11285","display_name":"Hearing Impairment and Communication","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.7379000186920166},{"id":"https://openalex.org/keywords/sign-language","display_name":"Sign language","score":0.5967000126838684},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5350000262260437},{"id":"https://openalex.org/keywords/gesture-recognition","display_name":"Gesture recognition","score":0.5153999924659729},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4740000069141388},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4309999942779541},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.39430001378059387},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.36309999227523804},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3400999903678894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7922000288963318},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.7379000186920166},{"id":"https://openalex.org/C522192633","wikidata":"https://www.wikidata.org/wiki/Q34228","display_name":"Sign language","level":2,"score":0.5967000126838684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5960999727249146},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5879999995231628},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C159437735","wikidata":"https://www.wikidata.org/wiki/Q1519524","display_name":"Gesture recognition","level":3,"score":0.5153999924659729},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4740000069141388},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4316999912261963},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4309999942779541},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.39430001378059387},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3391000032424927},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.31369999051094055},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.29159998893737793},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.2632000148296356},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00527","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00527","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4448975920677185}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2188882108","https://openalex.org/W2755802490","https://openalex.org/W2808328968","https://openalex.org/W2941870244","https://openalex.org/W2962730651","https://openalex.org/W2997931247","https://openalex.org/W3009828227","https://openalex.org/W3024060113","https://openalex.org/W3108425892","https://openalex.org/W3133226919","https://openalex.org/W3135189994","https://openalex.org/W3147467731","https://openalex.org/W3173262825","https://openalex.org/W3173290664","https://openalex.org/W3184215204","https://openalex.org/W3200751609","https://openalex.org/W3202747033","https://openalex.org/W3203359574","https://openalex.org/W4225286481","https://openalex.org/W4312439137","https://openalex.org/W4318940682","https://openalex.org/W4321375307","https://openalex.org/W4323314027","https://openalex.org/W4324135644","https://openalex.org/W4372260128","https://openalex.org/W4382240684","https://openalex.org/W4386071548","https://openalex.org/W4386076650","https://openalex.org/W4386170339","https://openalex.org/W4390873469","https://openalex.org/W4390873551","https://openalex.org/W4390878798","https://openalex.org/W4392904699","https://openalex.org/W4394625882","https://openalex.org/W4398763293","https://openalex.org/W4402672102","https://openalex.org/W4404838688","https://openalex.org/W7129088122","https://openalex.org/W7131104084"],"related_works":[],"abstract_inverted_index":{"Continuously":[0],"recognizing":[1,25],"sign":[2],"gestures":[3,33,195],"and":[4,20,26,31,88,140,193],"converting":[5],"them":[6],"to":[7,94,116,134,169,181,222,228],"glosses":[8,71,142],"plays":[9],"a":[10,39,44,162,177],"key":[11],"role":[12],"in":[13,156,224],"bridging":[14],"the":[15,18,28,35,95,132,138,141,144,153,157,197,206,214,229],"gap":[16],"between":[17,137],"hearing":[19],"hearing-impaired":[21],"communities.":[22],"This":[23],"involves":[24,43],"interpreting":[27],"hands,":[29],"face,":[30],"body":[32,194],"of":[34,46,127,220],"signer,":[36],"which":[37],"pose":[38,114,172],"challenge":[40],"as":[41],"it":[42],"combination":[45],"all":[47],"these":[48,78,102],"features.":[49],"Continuous":[50],"Sign":[51],"Language":[52],"Recognition":[53],"(CSLR)":[54],"methods":[55,80],"rely":[56],"on":[57,213],"multi-stage":[58,207],"pipelines":[59],"that":[60,111,191],"first":[61],"extract":[62],"visual":[63],"features,":[64],"then":[65],"align":[66],"variable-length":[67],"sequences":[68,115],"with":[69,90],"target":[70],"using":[72,166],"CTC":[73,147],"or":[74],"HMM-based":[75],"approaches.":[76],"However,":[77],"alignment-based":[79],"suffer":[81],"from":[82],"error":[83],"propagation":[84],"across":[85],"stages,":[86],"overfitting,":[87],"struggle":[89],"vocabulary":[91],"scalability":[92],"due":[93],"intermediate":[96],"gloss":[97],"representation":[98],"bottleneck.":[99],"To":[100],"address":[101],"limitations,":[103],"we":[104,189],"propose":[105],"AutoSign,":[106],"an":[107,218],"autoregressive":[108],"decoder-only":[109,129],"transformer":[110],"directly":[112,135,151],"translates":[113],"natural":[117],"language":[118],"text,":[119],"bypassing":[120],"traditional":[121],"alignment":[122],"mechanisms":[123],"entirely.":[124],"The":[125],"use":[126],"this":[128],"approach":[130,160],"allows":[131],"model":[133],"map":[136],"features":[139,200],"without":[143],"need":[145],"for":[146,201],"loss":[148],"while":[149],"also":[150],"learning":[152],"textual":[154],"dependencies":[155],"glosses.":[158],"Our":[159],"incorporates":[161],"temporal":[163],"compression":[164],"module":[165],"1D":[167],"CNNs":[168],"efficiently":[170],"process":[171],"sequences,":[173],"followed":[174],"by":[175],"AraGPT2,":[176],"pretrained":[178],"Arabic":[179],"decoder,":[180],"generate":[182],"text":[183],"(glosses).":[184],"Through":[185],"comprehensive":[186],"ablation":[187],"studies,":[188],"demonstrate":[190],"hand":[192],"provide":[196],"most":[198],"discriminative":[199],"signer-independent":[202],"CSLR.":[203],"By":[204],"eliminating":[205],"pipeline,":[208],"AutoSign":[209],"achieves":[210],"substantial":[211],"improvements":[212],"Isharah-1000":[215],"dataset,":[216],"achieving":[217],"improvement":[219],"up":[221],"6.1%":[223],"WER":[225],"score":[226],"compared":[227],"best":[230],"existing":[231],"method.":[232]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-25T06:17:34.324206","created_date":"2026-02-24T00:00:00"}
