{"id":"https://openalex.org/W4403446082","doi":"https://doi.org/10.1109/tmc.2024.3481443","title":"Img2Acoustic: A Cross-Modal Gesture Recognition Method Based on Few-Shot Learning","display_name":"Img2Acoustic: A Cross-Modal Gesture Recognition Method Based on Few-Shot Learning","publication_year":2024,"publication_date":"2024-10-16","ids":{"openalex":"https://openalex.org/W4403446082","doi":"https://doi.org/10.1109/tmc.2024.3481443"},"language":"en","primary_location":{"id":"doi:10.1109/tmc.2024.3481443","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2024.3481443","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025486492","display_name":"Yongpan Zou","orcid":"https://orcid.org/0000-0002-4314-6259"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yongpan Zou","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0002-4314-6259","affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000165691","display_name":"Jianhao Weng","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianhao Weng","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wenting Kuang","orcid":null},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenting Kuang","raw_affiliation_strings":["College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"College of Computer Science and Software Engineering, Shenzhen University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115594043","display_name":"Yang Jiao","orcid":"https://orcid.org/0000-0001-9468-1000"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Jiao","raw_affiliation_strings":["Faculty of Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science and Control Engineering, Shenzhen University of Advanced Technology, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380","https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035919267","display_name":"Victor C. M. Leung","orcid":"https://orcid.org/0000-0003-3529-2640"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4388482657","display_name":"Shenzhen MSU-BIT University","ror":"https://ror.org/02q963474","country_code":null,"type":"education","lineage":["https://openalex.org/I4388482657"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Victor C. M. Leung","raw_affiliation_strings":["Artificial Intelligence Research Institute, Shenzhen MSU-BIT University, Shenzhen, China","Artificial Intelligence Research Institute, Shenzhen MSU-BIT University, Shenzhen, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0003-3529-2640","affiliations":[{"raw_affiliation_string":"Artificial Intelligence Research Institute, Shenzhen MSU-BIT University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4388482657"]},{"raw_affiliation_string":"Artificial Intelligence Research Institute, Shenzhen MSU-BIT University, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I180726961","https://openalex.org/I4388482657"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001188748","display_name":"Kaishun Wu","orcid":"https://orcid.org/0000-0003-2216-0737"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Kaishun Wu","raw_affiliation_strings":["Information Hub, Hong Kong University of Science and Technology, Guangzhou, China","Information Hub, Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0003-2216-0737","affiliations":[{"raw_affiliation_string":"Information Hub, Hong Kong University of Science and Technology, Guangzhou, China","institution_ids":["https://openalex.org/I200769079"]},{"raw_affiliation_string":"Information Hub, Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5025486492"],"corresponding_institution_ids":["https://openalex.org/I180726961"],"apc_list":null,"apc_paid":null,"fwci":1.0297,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.77549628,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"24","issue":"3","first_page":"1496","last_page":"1512"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11285","display_name":"Hearing Impairment and Communication","score":0.963100016117096,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9577000141143799,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8284198641777039},{"id":"https://openalex.org/keywords/gesture","display_name":"Gesture","score":0.6948715448379517},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6756693124771118},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6450141668319702},{"id":"https://openalex.org/keywords/gesture-recognition","display_name":"Gesture recognition","score":0.5550737380981445},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5216332077980042},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.44968336820602417},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4259960353374481},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36600688099861145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8284198641777039},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.6948715448379517},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6756693124771118},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6450141668319702},{"id":"https://openalex.org/C159437735","wikidata":"https://www.wikidata.org/wiki/Q1519524","display_name":"Gesture recognition","level":3,"score":0.5550737380981445},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5216332077980042},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44968336820602417},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4259960353374481},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36600688099861145},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tmc.2024.3481443","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmc.2024.3481443","pdf_url":null,"source":{"id":"https://openalex.org/S69141925","display_name":"IEEE Transactions on Mobile Computing","issn_l":"1536-1233","issn":["1536-1233","1558-0660","2161-9875"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Mobile Computing","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-149324","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-149324","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1977574166","https://openalex.org/W2131425694","https://openalex.org/W2138136906","https://openalex.org/W2153200718","https://openalex.org/W2166742463","https://openalex.org/W2194321275","https://openalex.org/W2295107390","https://openalex.org/W2394945029","https://openalex.org/W2508345357","https://openalex.org/W2525860450","https://openalex.org/W2565639579","https://openalex.org/W2734358244","https://openalex.org/W2789860971","https://openalex.org/W2796208452","https://openalex.org/W2820122854","https://openalex.org/W2917310405","https://openalex.org/W2935290040","https://openalex.org/W2979689312","https://openalex.org/W3006378440","https://openalex.org/W3009081299","https://openalex.org/W3034942609","https://openalex.org/W3047212447","https://openalex.org/W3083249871","https://openalex.org/W3083323811","https://openalex.org/W3093489396","https://openalex.org/W3103709883","https://openalex.org/W3108244215","https://openalex.org/W3108975329","https://openalex.org/W3110608229","https://openalex.org/W3161004920","https://openalex.org/W3175117715","https://openalex.org/W3189329097","https://openalex.org/W3189656202","https://openalex.org/W3196221233","https://openalex.org/W3200883581","https://openalex.org/W3214233127","https://openalex.org/W4200329580","https://openalex.org/W4200546015","https://openalex.org/W4226267130","https://openalex.org/W4289752563","https://openalex.org/W4312883906","https://openalex.org/W4317927921","https://openalex.org/W4361761232","https://openalex.org/W4361771086","https://openalex.org/W4386065510","https://openalex.org/W4387087582","https://openalex.org/W6717697761","https://openalex.org/W6736057607","https://openalex.org/W6764592398","https://openalex.org/W6772329248","https://openalex.org/W6784924269"],"related_works":["https://openalex.org/W2066003895","https://openalex.org/W2902873204","https://openalex.org/W2185750513","https://openalex.org/W2010878661","https://openalex.org/W3147379364","https://openalex.org/W2026258298","https://openalex.org/W3204639664","https://openalex.org/W2970836791","https://openalex.org/W2805039731","https://openalex.org/W2989699735"],"abstract_inverted_index":{"Acoustic-based":[0],"human":[1],"gesture":[2,129],"recognition":[3],"(HGR)":[4],"offers":[5],"diverse":[6],"applications":[7],"due":[8],"to":[9,39,95,127,155],"the":[10,27,41,61,161],"ubiquity":[11],"of":[12,122],"sensors":[13],"and":[14,31,66,112,139,152],"touch-free":[15],"interaction.":[16],"However,":[17],"existing":[18,156],"machine":[19],"learning":[20],"approaches":[21,157],"require":[22],"substantial":[23],"training":[24,45,165],"data,":[25],"making":[26],"process":[28],"time-consuming,":[29],"costly,":[30],"labor-intensive.":[32],"Recent":[33],"studies":[34],"have":[35],"explored":[36],"cross-modal":[37,80],"methods":[38],"reduce":[40],"need":[42,162],"for":[43,163],"large":[44],"datasets":[46,56,91,126],"in":[47],"behavior":[48],"recognition,":[49],"but":[50,158],"they":[51],"typically":[52],"rely":[53],"on":[54,88,136],"open-source":[55,89],"that":[57,84,144],"closely":[58],"align":[59],"with":[60],"target":[62],"domain,":[63],"limiting":[64],"flexibility":[65],"complicating":[67],"data":[68,166],"collection.":[69,167],"In":[70],"this":[71],"paper,":[72],"we":[73],"propose":[74],"<inline-formula><tex-math":[75,133],"notation=\"LaTeX\">${\\sf":[76,134],"Img2Acoustic}$</tex-math></inline-formula>,":[77],"a":[78,107,113],"novel":[79],"acoustic-based":[81],"HGR":[82],"approach":[83],"leverages":[85],"models":[86],"trained":[87],"image":[90,125],"(i.e.,":[92],"EMNIST,":[93],"Omniglot)":[94],"effectively":[96],"recognize":[97],"custom":[98],"gestures":[99],"detected":[100],"via":[101],"acoustic":[102,128],"signals.":[103],"Our":[104],"model":[105],"incorporates":[106],"task-aware":[108,114],"attention":[109],"layer":[110,117],"(TAAL)":[111],"local":[115],"matching":[116],"(TALML),":[118],"enabling":[119],"seamless":[120],"transfer":[121],"knowledge":[123],"from":[124],"recognition.":[130],"We":[131],"implement":[132],"Img2Acoustic}$</tex-math></inline-formula>":[135],"commercial":[137],"devices":[138],"conduct":[140],"comprehensive":[141],"evaluations,":[142],"demonstrating":[143],"our":[145],"method":[146],"not":[147],"only":[148],"delivers":[149],"superior":[150],"accuracy":[151],"robustness":[153],"compared":[154],"also":[159],"eliminates":[160],"extensive":[164]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2024-10-17T00:00:00"}
