{"id":"https://openalex.org/W4409553589","doi":"https://doi.org/10.1007/s10462-025-11228-4","title":"Learning semantic consistency for audio-visual zero-shot learning","display_name":"Learning semantic consistency for audio-visual zero-shot learning","publication_year":2025,"publication_date":"2025-04-17","ids":{"openalex":"https://openalex.org/W4409553589","doi":"https://doi.org/10.1007/s10462-025-11228-4"},"language":"en","primary_location":{"id":"doi:10.1007/s10462-025-11228-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10462-025-11228-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10462-025-11228-4.pdf","source":{"id":"https://openalex.org/S122814990","display_name":"Artificial Intelligence Review","issn_l":"0269-2821","issn":["0269-2821","1573-7462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10462-025-11228-4.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071782289","display_name":"Xiaoyong Li","orcid":"https://orcid.org/0000-0002-6580-1648"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyong Li","raw_affiliation_strings":["State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100459985","display_name":"Jing Yang","orcid":"https://orcid.org/0000-0002-5973-248X"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]},{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jing Yang","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Dongchuan Road, Minhang District, Shanghai, 201100, Shanghai, China","State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Dongchuan Road, Minhang District, Shanghai, 201100, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100722816","display_name":"Yuling Chen","orcid":"https://orcid.org/0000-0002-8399-2070"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuling Chen","raw_affiliation_strings":["State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441494","display_name":"Wei Zhang","orcid":"https://orcid.org/0000-0001-7390-7613"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["School of Management, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Management, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005134326","display_name":"Xiaoli Ruan","orcid":"https://orcid.org/0000-0001-6031-2282"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoli Ruan","raw_affiliation_strings":["State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Public Big Data, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037571401","display_name":"Chengjiang Li","orcid":"https://orcid.org/0000-0002-1864-2023"},"institutions":[{"id":"https://openalex.org/I178232147","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916","country_code":"CN","type":"education","lineage":["https://openalex.org/I178232147"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengjiang Li","raw_affiliation_strings":["School of Management, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Management, Guizhou University, Jiaxiu South Road, Huaxi District, Guiyang, 550025, Guizhou, China","institution_ids":["https://openalex.org/I178232147"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025790251","display_name":"Zhidong Su","orcid":"https://orcid.org/0000-0001-6592-0666"},"institutions":[{"id":"https://openalex.org/I115475287","display_name":"Oklahoma State University","ror":"https://ror.org/01g9vbr38","country_code":"US","type":"education","lineage":["https://openalex.org/I115475287"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhidong Su","raw_affiliation_strings":["Electrical and Computer Engineering, Oklahoma State University, 107 Whitehurst, Stillwater, 74078-1015, OK, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Oklahoma State University, 107 Whitehurst, Stillwater, 74078-1015, OK, USA","institution_ids":["https://openalex.org/I115475287"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100459985"],"corresponding_institution_ids":["https://openalex.org/I178232147","https://openalex.org/I183067930"],"apc_list":{"value":2490,"currency":"EUR","value_usd":3090},"apc_paid":{"value":2490,"currency":"EUR","value_usd":3090},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07390755,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"58","issue":"7","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8125011920928955},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.7240880727767944},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.626473069190979},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5563892126083374},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4793761074542999},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4370185434818268},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35413986444473267},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.30454036593437195}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8125011920928955},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.7240880727767944},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.626473069190979},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5563892126083374},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4793761074542999},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4370185434818268},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35413986444473267},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.30454036593437195},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10462-025-11228-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10462-025-11228-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10462-025-11228-4.pdf","source":{"id":"https://openalex.org/S122814990","display_name":"Artificial Intelligence Review","issn_l":"0269-2821","issn":["0269-2821","1573-7462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence Review","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10462-025-11228-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10462-025-11228-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10462-025-11228-4.pdf","source":{"id":"https://openalex.org/S122814990","display_name":"Artificial Intelligence Review","issn_l":"0269-2821","issn":["0269-2821","1573-7462"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence Review","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.4099999964237213}],"awards":[{"id":"https://openalex.org/G1631827526","display_name":null,"funder_award_id":"(62441608,62166005)","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1665839876","display_name":null,"funder_award_id":"[2023]48","funder_id":"https://openalex.org/F4320321927","funder_display_name":"Guizhou University"},{"id":"https://openalex.org/G3957560929","display_name":null,"funder_award_id":"ZKHT[2023]48-8","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5288398403","display_name":null,"funder_award_id":"62166005","funder_id":"https://openalex.org/F4320321927","funder_display_name":"Guizhou University"},{"id":"https://openalex.org/G6257038346","display_name":null,"funder_award_id":"QKHZC[2023]368","funder_id":"https://openalex.org/F4320336617","funder_display_name":"Science and Technology Program of Guizhou Province"},{"id":"https://openalex.org/G6360692046","display_name":null,"funder_award_id":"62166005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6719427925","display_name":null,"funder_award_id":"62441608","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321927","display_name":"Guizhou University","ror":"https://ror.org/02wmsc916"},{"id":"https://openalex.org/F4320336617","display_name":"Science and Technology Program of Guizhou Province","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409553589.pdf","grobid_xml":"https://content.openalex.org/works/W4409553589.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1927052826","https://openalex.org/W2044913453","https://openalex.org/W2171061940","https://openalex.org/W2526050071","https://openalex.org/W2924476266","https://openalex.org/W2962865004","https://openalex.org/W2963318411","https://openalex.org/W3015371781","https://openalex.org/W3025520547","https://openalex.org/W3034694537","https://openalex.org/W3035102141","https://openalex.org/W3035700349","https://openalex.org/W3048939150","https://openalex.org/W3099638501","https://openalex.org/W3119136678","https://openalex.org/W3170088426","https://openalex.org/W3203711169","https://openalex.org/W3204588463","https://openalex.org/W4200629525","https://openalex.org/W4214764301","https://openalex.org/W4295916008","https://openalex.org/W4312653797","https://openalex.org/W4312763854","https://openalex.org/W4313127305","https://openalex.org/W4382073604","https://openalex.org/W4385488559","https://openalex.org/W4386108345","https://openalex.org/W4390872378","https://openalex.org/W4393181080","https://openalex.org/W4394735382","https://openalex.org/W4400033239","https://openalex.org/W4400903918","https://openalex.org/W4401634040","https://openalex.org/W4402082562","https://openalex.org/W4402904133","https://openalex.org/W4402917184","https://openalex.org/W4403561453","https://openalex.org/W6600688380","https://openalex.org/W6601539397"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2271369634","https://openalex.org/W2366718574","https://openalex.org/W3147472394","https://openalex.org/W2047100085"],"abstract_inverted_index":{"Audio-visual":[0],"zero-shot":[1],"learning":[2,34],"requires":[3],"an":[4,71],"understanding":[5],"of":[6,52,101,126],"the":[7,25,38,48,53,82,98,102,106,116,122,131,156,169,173],"relationship":[8],"between":[9,41,85,118,124],"audio":[10,42,86],"and":[11,21,43,87,147,181],"visual":[12,88],"information":[13,77],"to":[14,31,74,80,96],"determine":[15],"unseen":[16],"classes.":[17],"Despite":[18],"many":[19,27],"efforts":[20],"significant":[22],"progress":[23],"in":[24],"field,":[26],"existing":[28],"methods":[29],"tend":[30],"focus":[32],"on":[33,137,162,168],"strong":[35],"representations,":[36],"neglecting":[37],"semantic":[39,83],"consistency":[40,84],"video":[44],"as":[45,47],"well":[46],"inherent":[49],"hierarchical":[50,99],"structure":[51,100],"data.":[54,89],"To":[55,129],"address":[56],"these":[57],"issues,":[58],"we":[59,69,91,134],"propose":[60],"Learning":[61],"Semantic":[62],"Consistency":[63],"for":[64],"Audio-Visual":[65],"Zero-shot":[66],"Learning.":[67],"Specifically,":[68],"employ":[70],"attention":[72],"mechanism":[73],"enhance":[75],"cross-modal":[76],"interactions,":[78],"aiming":[79],"capture":[81],"Meanwhile,":[90],"introduce":[92],"a":[93,110],"hyperbolic":[94],"space":[95],"model":[97],"data":[103,182],"itself.":[104],"Moreover,":[105],"proposed":[107,132,157],"approach":[108],"includes":[109],"novel":[111],"loss":[112],"function":[113],"that":[114,155],"considers":[115],"relationships":[117],"input":[119],"modalities,":[120],"reducing":[121],"distance":[123],"features":[125],"different":[127],"modalities.":[128],"evaluate":[130],"method,":[133],"test":[135],"it":[136],"three":[138,164],"benchmark":[139],"datasets":[140],"$$\\hbox":[141,144,148,170],"{VGGSound-GZS}{{\\textrm{L}}^{cls}}$$":[142],",":[143,146],"{UCF-GZS}{{\\textrm{L}}^{cls}}$$":[145,171],"{ActivityNet-GZS}{{\\textrm{L}}^{cls}}$$":[149],".":[150,186],"Extensive":[151],"experimental":[152],"results":[153],"show":[154],"method":[158],"achieves":[159],"state-of-the-art":[160],"performance":[161],"all":[163],"datasets.":[165],"For":[166],"example,":[167],"dataset,":[172],"harmonic":[174],"mean":[175],"is":[176],"improved":[177],"by":[178],"5.7%.":[179],"Code":[180],"available":[183],"at":[184],"https://github.com/ybyangjing/LSC-AVZSL":[185]},"counts_by_year":[],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
