{"id":"https://openalex.org/W4316660066","doi":"https://doi.org/10.1109/vcip56404.2022.10008833","title":"MAiVAR: Multimodal Audio-Image and Video Action Recognizer","display_name":"MAiVAR: Multimodal Audio-Image and Video Action Recognizer","publication_year":2022,"publication_date":"2022-12-13","ids":{"openalex":"https://openalex.org/W4316660066","doi":"https://doi.org/10.1109/vcip56404.2022.10008833"},"language":"en","primary_location":{"id":"doi:10.1109/vcip56404.2022.10008833","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/vcip56404.2022.10008833","pdf_url":null,"source":{"id":"https://openalex.org/S4363608486","display_name":"2022 IEEE International Conference on Visual Communications and Image Processing (VCIP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036110830","display_name":"Muhammad Bilal Shaikh","orcid":"https://orcid.org/0000-0001-9042-5018"},"institutions":[{"id":"https://openalex.org/I12079687","display_name":"Edith Cowan University","ror":"https://ror.org/05jhnwe22","country_code":"AU","type":"education","lineage":["https://openalex.org/I12079687"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Muhammad Bilal Shaikh","raw_affiliation_strings":["Edith Cowan University,Perth,WA,Australia,6027"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Edith Cowan University,Perth,WA,Australia,6027","institution_ids":["https://openalex.org/I12079687"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056138775","display_name":"Douglas Chai","orcid":"https://orcid.org/0000-0002-9004-7608"},"institutions":[{"id":"https://openalex.org/I12079687","display_name":"Edith Cowan University","ror":"https://ror.org/05jhnwe22","country_code":"AU","type":"education","lineage":["https://openalex.org/I12079687"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Douglas Chai","raw_affiliation_strings":["Edith Cowan University,Perth,WA,Australia,6027"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Edith Cowan University,Perth,WA,Australia,6027","institution_ids":["https://openalex.org/I12079687"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034900636","display_name":"Syed Mohammed Shamsul Islam","orcid":"https://orcid.org/0000-0002-3200-2903"},"institutions":[{"id":"https://openalex.org/I12079687","display_name":"Edith Cowan University","ror":"https://ror.org/05jhnwe22","country_code":"AU","type":"education","lineage":["https://openalex.org/I12079687"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Syed Mohammed Shamsul Islam","raw_affiliation_strings":["Edith Cowan University,Perth,WA,Australia,6027"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Edith Cowan University,Perth,WA,Australia,6027","institution_ids":["https://openalex.org/I12079687"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069697936","display_name":"Naveed Akhtar","orcid":"https://orcid.org/0000-0003-3406-673X"},"institutions":[{"id":"https://openalex.org/I177877127","display_name":"The University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Naveed Akhtar","raw_affiliation_strings":["The University of Western Australia,Perth,WA,Australia,6009"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Western Australia,Perth,WA,Australia,6009","institution_ids":["https://openalex.org/I177877127"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2359,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.60617459,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"28","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11398","display_name":"Hand Gesture Recognition Systems","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8376198410987854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6018227934837341},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5675764083862305},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5508072376251221},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.504698634147644},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.46356260776519775},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4404889941215515},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4397222101688385},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.43694087862968445},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4307023882865906},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.360274076461792}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8376198410987854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6018227934837341},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5675764083862305},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5508072376251221},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.504698634147644},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.46356260776519775},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4404889941215515},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4397222101688385},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.43694087862968445},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4307023882865906},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.360274076461792},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/vcip56404.2022.10008833","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/vcip56404.2022.10008833","pdf_url":null,"source":{"id":"https://openalex.org/S4363608486","display_name":"2022 IEEE International Conference on Visual Communications and Image Processing (VCIP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/320adc19-a686-4612-ac40-2216742198bc","is_oa":false,"landing_page_url":"http://www.scopus.com/inward/record.url?scp=85147247031&partnerID=8YFLogxK","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Shaikh, M B, Chai, D, Islam, S M S & Akhtar, N 2022, MAiVAR : Multimodal Audio-Image and Video Action Recognizer. in 2022 IEEE International Conference on Visual Communications and Image Processing, VCIP 2022. 2022 IEEE International Conference on Visual Communications and Image Processing, VCIP 2022, IEEE, Institute of Electrical and Electronics Engineers, USA, 2022 IEEE International Conference on Visual Communications and Image Processing, VCIP 2022, Suzhou, China, 13/12/22. https://doi.org/10.1109/VCIP56404.2022.10008833","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W639708223","https://openalex.org/W1522734439","https://openalex.org/W2032337854","https://openalex.org/W2117539524","https://openalex.org/W2191779130","https://openalex.org/W2194775991","https://openalex.org/W2465313502","https://openalex.org/W2507009361","https://openalex.org/W2570915410","https://openalex.org/W2608988379","https://openalex.org/W2770804203","https://openalex.org/W2955425717","https://openalex.org/W2961193895","https://openalex.org/W2963370182","https://openalex.org/W2963457877","https://openalex.org/W2963524571","https://openalex.org/W2964350391","https://openalex.org/W2968553732","https://openalex.org/W2971680695","https://openalex.org/W2990152177","https://openalex.org/W2990503944","https://openalex.org/W3005865003","https://openalex.org/W3025948831","https://openalex.org/W3034658206","https://openalex.org/W3034768625","https://openalex.org/W3168640669","https://openalex.org/W3174760001","https://openalex.org/W3175419009","https://openalex.org/W3214007456","https://openalex.org/W4214777631","https://openalex.org/W4220894980","https://openalex.org/W4295312788","https://openalex.org/W6600983433","https://openalex.org/W6620707391","https://openalex.org/W6762718338","https://openalex.org/W6766978945"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463","https://openalex.org/W4307407935"],"abstract_inverted_index":{"Currently,":[0],"action":[1,28,69,98],"recognition":[2,29,70,99],"is":[3],"predominately":[4],"performed":[5],"on":[6,95],"video":[7,56,62,83],"data":[8],"as":[9,89],"processed":[10],"by":[11,30],"CNNs.":[12],"We":[13],"investigate":[14],"if":[15],"the":[16],"representation":[17,84],"process":[18],"of":[19,35,77],"CNN":[20],"s":[21],"can":[22],"also":[23],"be":[24],"leveraged":[25],"for":[26,61],"multimodal":[27],"incorporating":[31],"image-based":[32],"audio":[33,64,78],"representations":[34,76],"actions":[36],"in":[37],"a":[38,52,96],"task.":[39],"To":[40],"this":[41],"end,":[42],"we":[43],"propose":[44],"Multimodal":[45],"Audio-Image":[46],"and":[47,63,79],"Video":[48],"Action":[49],"Recognizer":[50],"(MAiVAR),":[51],"CNN-based":[53],"audio-image":[54],"to":[55,66,85,91],"fusion":[57],"model":[58],"that":[59],"accounts":[60],"modalities":[65,93],"achieve":[67,86],"superior":[68],"performance.":[71],"MAiVAR":[72],"extracts":[73],"meaningful":[74],"image":[75],"fuses":[80],"it":[81],"with":[82],"better":[87],"performance":[88],"compared":[90],"both":[92],"individually":[94],"large-scale":[97],"dataset.":[100]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
