{"id":"https://openalex.org/W4414654916","doi":"https://doi.org/10.1145/3742886.3756724","title":"LLM-Driven Multimodal Video-Text Fusion for Isolated Sign Language Recognition","display_name":"LLM-Driven Multimodal Video-Text Fusion for Isolated Sign Language Recognition","publication_year":2025,"publication_date":"2025-09-16","ids":{"openalex":"https://openalex.org/W4414654916","doi":"https://doi.org/10.1145/3742886.3756724"},"language":"en","primary_location":{"id":"doi:10.1145/3742886.3756724","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3742886.3756724","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Adjunct Proceedings of the 25th ACM International Conference on Intelligent Virtual Agents","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094034370","display_name":"Sergio Esteban-Romero","orcid":"https://orcid.org/0009-0008-6336-7877"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Sergio Esteban-Romero","raw_affiliation_strings":["Universidad Polit\u00e9cnica de Madrid, Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0009-0008-6336-7877","affiliations":[{"raw_affiliation_string":"Universidad Polit\u00e9cnica de Madrid, Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069086595","display_name":"Cristina Luna-Jim\u00e9nez","orcid":"https://orcid.org/0000-0001-5369-856X"},"institutions":[{"id":"https://openalex.org/I179225836","display_name":"University of Augsburg","ror":"https://ror.org/03p14d497","country_code":"DE","type":"education","lineage":["https://openalex.org/I179225836"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Cristina Luna-Jim\u00e9nez","raw_affiliation_strings":["Chair for Human-Centered Artificial Intelligence, Augsburg University, Augsburg, Augsburg, Germany"],"raw_orcid":"https://orcid.org/0000-0001-5369-856X","affiliations":[{"raw_affiliation_string":"Chair for Human-Centered Artificial Intelligence, Augsburg University, Augsburg, Augsburg, Germany","institution_ids":["https://openalex.org/I179225836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058484271","display_name":"Manuel Gil-Mart\u00edn","orcid":"https://orcid.org/0000-0002-4285-6224"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Manuel Gil-Mart\u00edn","raw_affiliation_strings":["Universidad Polit\u00e9cnica de Madrid, Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0002-4285-6224","affiliations":[{"raw_affiliation_string":"Universidad Polit\u00e9cnica de Madrid, Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032875587","display_name":"Fernando Fern\u00e1ndez-Mart\u00ednez","orcid":"https://orcid.org/0000-0003-3877-0089"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Fernando Fern\u00e1ndez-Mart\u00ednez","raw_affiliation_strings":["Electronics Engineering Departament, Universidad Polit\u00e9cnica de Madrid, Madrid, Madrid, Spain"],"raw_orcid":"https://orcid.org/0000-0003-3877-0089","affiliations":[{"raw_affiliation_string":"Electronics Engineering Departament, Universidad Polit\u00e9cnica de Madrid, Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056684559","display_name":"Elisabeth Andr\u00e9","orcid":"https://orcid.org/0000-0002-2367-162X"},"institutions":[{"id":"https://openalex.org/I179225836","display_name":"University of Augsburg","ror":"https://ror.org/03p14d497","country_code":"DE","type":"education","lineage":["https://openalex.org/I179225836"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Elisabeth Andre","raw_affiliation_strings":["University of Augsburg, Augsburg, Germany"],"raw_orcid":"https://orcid.org/0000-0002-2367-162X","affiliations":[{"raw_affiliation_string":"University of Augsburg, Augsburg, Germany","institution_ids":["https://openalex.org/I179225836"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5094034370"],"corresponding_institution_ids":["https://openalex.org/I88060688"],"apc_list":null,"apc_paid":null,"fwci":1.5238,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85422301,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sign","display_name":"Sign (mathematics)","score":0.3312999904155731},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.32899999618530273},{"id":"https://openalex.org/keywords/sign-language","display_name":"Sign language","score":0.328000009059906},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3253999948501587},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.3043999969959259}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.602400004863739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6000999808311462},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44519999623298645},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37040001153945923},{"id":"https://openalex.org/C139676723","wikidata":"https://www.wikidata.org/wiki/Q1193832","display_name":"Sign (mathematics)","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C522192633","wikidata":"https://www.wikidata.org/wiki/Q34228","display_name":"Sign language","level":2,"score":0.328000009059906},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3253999948501587},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32199999690055847},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2644999921321869}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3742886.3756724","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3742886.3756724","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Adjunct Proceedings of the 25th ACM International Conference on Intelligent Virtual Agents","raw_type":"proceedings-article"},{"id":"pmh:oai:oa.upm.es:91247","is_oa":false,"landing_page_url":"https://oa.upm.es/91247/","pdf_url":null,"source":{"id":"https://openalex.org/S4377196323","display_name":"UPM Digital Archive (Technical University of Madrid)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I88060688","host_organization_name":"Universidad Polit\u00e9cnica de Madrid","host_organization_lineage":["https://openalex.org/I88060688"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"IVA Adjunct '25: Adjunct Proceedings of the 25th ACM International Conference on Intelligent Virtual Agents | 25th ACM International Conference on Intelligent Virtual Agents | 16/09/2025-19/09/2025 | Berl\u00edn, Alemania","raw_type":"info:eu-repo/semantics/acceptedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1267305276","display_name":null,"funder_award_id":"101071191 \u2014 HORIZON-EIC-2021-PATHFINDERCHALLENGES-01","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2972662547","https://openalex.org/W2998994232","https://openalex.org/W3009828227","https://openalex.org/W4212986483","https://openalex.org/W4285012305","https://openalex.org/W4387424008","https://openalex.org/W4390873469","https://openalex.org/W4392904699","https://openalex.org/W4402773331","https://openalex.org/W4403713273","https://openalex.org/W4403713326","https://openalex.org/W4415065007","https://openalex.org/W6964326745"],"related_works":[],"abstract_inverted_index":{"Sign":[0,33],"languages":[1],"are":[2,80],"the":[3,12,29,56,68,76,83,107,110,116],"primary":[4],"means":[5],"of":[6,14,31,60,71,94,101,118],"communication":[7],"for":[8,126],"deaf":[9],"communities,":[10],"but":[11],"development":[13],"effective":[15],"automatic":[16],"recognition":[17,77],"systems":[18],"remains":[19],"a":[20,38,43,90,97],"significant":[21],"challenge.":[22],"In":[23],"this":[24],"work,":[25],"we":[26],"focus":[27],"on":[28,82],"task":[30],"Isolated":[32],"Language":[34,45],"Recognition":[35],"(ISLR)":[36],"using":[37],"multimodal":[39,119],"approach":[40],"grounded":[41],"in":[42,121],"Large":[44],"Model":[46],"(LLM)":[47],"architecture.":[48],"We":[49],"merge":[50],"modalities,":[51],"including":[52],"visual":[53,73],"characteristics":[54],"into":[55,109],"linguistic":[57],"representation":[58],"space":[59],"LLMs,":[61],"and":[62,96,123],"perform":[63],"ablation":[64],"studies":[65],"to":[66,75],"evaluate":[67],"individual":[69],"contributions":[70],"each":[72],"modality":[74],"performance.":[78],"Experiments":[79],"conducted":[81],"AVASAG100":[84],"dataset,":[85],"where":[86],"our":[87],"method":[88],"achieves":[89],"weighted":[91],"F1-score":[92,99],"(W-F1)":[93],"70.36\u00b13.00":[95],"macro":[98],"(MF1)":[100],"62.34\u00b13.18":[102],"projecting":[103],"landmarks":[104],"extracted":[105],"from":[106],"pose":[108],"LLM\u2019s":[111],"emebdding-space.":[112],"These":[113],"results":[114],"underscore":[115],"value":[117],"integration":[120],"ISLR":[122],"provide":[124],"guidelines":[125],"future":[127],"research":[128],"directions.":[129]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
