{"id":"https://openalex.org/W4404132563","doi":"https://doi.org/10.1109/tetci.2024.3485624","title":"Object-Aware Image Augmentation for Audio-Visual Zero-Shot Learning","display_name":"Object-Aware Image Augmentation for Audio-Visual Zero-Shot Learning","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4404132563","doi":"https://doi.org/10.1109/tetci.2024.3485624"},"language":"en","primary_location":{"id":"doi:10.1109/tetci.2024.3485624","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3485624","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007705812","display_name":"Yujie Dong","orcid":"https://orcid.org/0009-0000-5950-9352"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yujie Dong","raw_affiliation_strings":["State Key Laboratory of Public Big Data, College of Computer Science and Technology, Guizhou University, Guiyang, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Public Big Data, College of Computer Science and Technology, Guizhou University, Guiyang, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071943020","display_name":"Shiming Chen","orcid":"https://orcid.org/0000-0001-9633-3392"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Shiming Chen","raw_affiliation_strings":["Mohamed bin Zayed University of AI, Abu Dhabi, UAE"],"affiliations":[{"raw_affiliation_string":"Mohamed bin Zayed University of AI, Abu Dhabi, UAE","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113424381","display_name":"B.Y. Duan","orcid":"https://orcid.org/0009-0000-3048-4796"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bowen Duan","raw_affiliation_strings":["State Key Laboratory of Public Big Data, College of Computer Science and Technology, Guizhou University, Guiyang, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Public Big Data, College of Computer Science and Technology, Guizhou University, Guiyang, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069969191","display_name":"Weiping Ding","orcid":"https://orcid.org/0000-0002-3180-7347"},"institutions":[{"id":"https://openalex.org/I199305430","display_name":"Nantong University","ror":"https://ror.org/02afcvw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I199305430"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Ding","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China","institution_ids":["https://openalex.org/I199305430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101688864","display_name":"Yisong Wang","orcid":"https://orcid.org/0000-0003-2126-7006"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yisong Wang","raw_affiliation_strings":["State Key Laboratory of Public Big Data, College of Computer Science and Technology, Guizhou University, Guiyang, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Public Big Data, College of Computer Science and Technology, Guizhou University, Guiyang, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057095711","display_name":"Xinge You","orcid":"https://orcid.org/0009-0006-1450-9504"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinge You","raw_affiliation_strings":["School of Electronic Information and Communication, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Communication, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5007705812"],"corresponding_institution_ids":["https://openalex.org/I178232147"],"apc_list":null,"apc_paid":null,"fwci":1.0122,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.80636713,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"9","issue":"6","first_page":"4106","last_page":"4118"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9740999937057495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9718999862670898,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.823265552520752},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6943906545639038},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6619113683700562},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5908734798431396},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5723430514335632},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5425137281417847},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5372495651245117},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.49384742975234985},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.403237521648407},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.31273168325424194}],"concepts":[{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.823265552520752},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6943906545639038},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6619113683700562},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5908734798431396},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5723430514335632},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5425137281417847},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5372495651245117},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.49384742975234985},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.403237521648407},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.31273168325424194},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetci.2024.3485624","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetci.2024.3485624","pdf_url":null,"source":{"id":"https://openalex.org/S4210210251","display_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","issn_l":"2471-285X","issn":["2471-285X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computational Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2188732198","display_name":null,"funder_award_id":"62376066","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6401699566","display_name":null,"funder_award_id":"61976065","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W2044913453","https://openalex.org/W2128532956","https://openalex.org/W2171061940","https://openalex.org/W2194775991","https://openalex.org/W2511428026","https://openalex.org/W2596142952","https://openalex.org/W2604808168","https://openalex.org/W2724492314","https://openalex.org/W2887567284","https://openalex.org/W2890671638","https://openalex.org/W2962865004","https://openalex.org/W2962960500","https://openalex.org/W2963218389","https://openalex.org/W2963499153","https://openalex.org/W2963538198","https://openalex.org/W2963545832","https://openalex.org/W2965373098","https://openalex.org/W2998351764","https://openalex.org/W3000538487","https://openalex.org/W3025520547","https://openalex.org/W3035356612","https://openalex.org/W3119136678","https://openalex.org/W3124902415","https://openalex.org/W3138521398","https://openalex.org/W3141876413","https://openalex.org/W3175825020","https://openalex.org/W3203055845","https://openalex.org/W4200297686","https://openalex.org/W4200633401","https://openalex.org/W4205964412","https://openalex.org/W4213299273","https://openalex.org/W4214746887","https://openalex.org/W4226155060","https://openalex.org/W4229044990","https://openalex.org/W4283817440","https://openalex.org/W4285787895","https://openalex.org/W4312315944","https://openalex.org/W4312584871","https://openalex.org/W4312653797","https://openalex.org/W4312763854","https://openalex.org/W4312791158","https://openalex.org/W4313123992","https://openalex.org/W4315606144","https://openalex.org/W4317384342","https://openalex.org/W4327928270","https://openalex.org/W4364322755","https://openalex.org/W4366988472","https://openalex.org/W4380032301","https://openalex.org/W4382203358","https://openalex.org/W4386025726","https://openalex.org/W4390872378","https://openalex.org/W4390874107","https://openalex.org/W4394896988","https://openalex.org/W4399665752","https://openalex.org/W4400936025","https://openalex.org/W4402111610","https://openalex.org/W4402702970"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2271369634","https://openalex.org/W2366718574","https://openalex.org/W3147472394","https://openalex.org/W2047100085"],"abstract_inverted_index":{"Audio-visual":[0],"zero-shot":[1],"learning":[2,134],"(ZSL)":[3],"leverages":[4],"both":[5],"video":[6,17],"and":[7,86,97,113,120,131,138,182],"audio":[8],"information":[9,51],"for":[10,75,101,128,153],"model":[11,130,164],"training,":[12],"aiming":[13],"to":[14,31,52,93,111,149],"classify":[15],"new":[16],"categories":[18],"that":[19,162],"were":[20],"not":[21],"seen":[22],"during":[23,174],"the":[24,40,105,115,129,133,163,180],"training.":[25],"However,":[26],"existing":[27,158],"methods":[28,47],"often":[29],"failed":[30],"learn":[32],"robust":[33],"multi-modal":[34,140],"feature":[35,95],"representations":[36,96],"because":[37],"they":[38],"overlook":[39],"importance":[41],"of":[42,117,135,184],"object-aware":[43,121],"images.":[44],"Moreover,":[45],"these":[46,64],"require":[48],"complete":[49],"modal":[50,159,170],"operate":[53,166],"effectively,":[54],"which":[55],"limits":[56],"their":[57],"performance":[58],"in":[59],"resource-constrained":[60],"environments.":[61],"To":[62],"address":[63],"issues,":[65],"this":[66],"paper":[67],"proposes":[68],"an":[69],"Object-Aware":[70],"Image":[71],"Augmentation":[72,83],"Network":[73],"(OAIA)":[74],"audio-visual":[76],"ZSL.":[77],"OAIA":[78],"introduces":[79],"a":[80,87,124,146],"Cross-Modal":[81],"Feature":[82],"(CMFA)":[84],"subnet":[85,92,107,144],"Missing":[88],"Modality":[89],"Generation":[90],"(MMG)":[91],"enhance":[94,114],"generate":[98,150],"virtual":[99,151],"features":[100,116,152],"missing":[102,154],"modalities.":[103],"Specifically,":[104],"CMFA":[106],"uses":[108],"attention":[109],"mechanisms":[110],"integrate":[112],"video,":[118],"audio,":[119],"images,":[122],"providing":[123],"richer":[125],"training":[126],"signal":[127],"promoting":[132],"more":[136],"diverse":[137],"discriminative":[139],"representations.":[141],"The":[142],"MMG":[143],"employs":[145],"multi-layer":[147],"perceptron":[148],"modalities":[155],"based":[156],"on":[157],"information,":[160],"ensuring":[161],"can":[165],"effectively":[167],"even":[168],"when":[169],"data":[171],"is":[172],"incomplete":[173],"testing.":[175],"Extensive":[176],"experiments":[177],"have":[178],"demonstrated":[179],"effectiveness":[181],"superiority":[183],"our":[185],"proposed":[186],"method.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
