{"id":"https://openalex.org/W4392251991","doi":"https://doi.org/10.1109/tmm.2024.3371220","title":"Noise-Tolerant Learning for Audio-Visual Action Recognition","display_name":"Noise-Tolerant Learning for Audio-Visual Action Recognition","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4392251991","doi":"https://doi.org/10.1109/tmm.2024.3371220"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3371220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3371220","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101119626","display_name":"Haochen Han","orcid":"https://orcid.org/0000-0002-0479-8932"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haochen Han","raw_affiliation_strings":["National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041083459","display_name":"Qinghua Zheng","orcid":"https://orcid.org/0000-0002-8436-4754"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinghua Zheng","raw_affiliation_strings":["National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013911439","display_name":"Minnan Luo","orcid":"https://orcid.org/0000-0002-0140-7860"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minnan Luo","raw_affiliation_strings":["National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048527206","display_name":"Kaiyao Miao","orcid":"https://orcid.org/0000-0001-8579-1750"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiyao Miao","raw_affiliation_strings":["Key Laboratory of Intelligent Networks and Network Security (Xi&#x0027;an Jiaotong University), Ministry of Education, Xi&#x0027;an, China","School of Cyber Science and Engineering, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Networks and Network Security (Xi&#x0027;an Jiaotong University), Ministry of Education, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"School of Cyber Science and Engineering, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013725152","display_name":"Feng Tian","orcid":"https://orcid.org/0000-0001-7888-0587"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Tian","raw_affiliation_strings":["National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114549847","display_name":"Yan Chen","orcid":"https://orcid.org/0000-0003-4838-3779"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Chen","raw_affiliation_strings":["National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Lab for Big Data Analytics, Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101119626"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":4.2435,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.9514125,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"26","issue":null,"first_page":"7761","last_page":"7774"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8585327863693237},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6537061333656311},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5967413783073425},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action recognition","score":0.49698522686958313},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.411480575799942},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3847994804382324},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3775798976421356},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1464664340019226}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8585327863693237},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6537061333656311},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5967413783073425},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.49698522686958313},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.411480575799942},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3847994804382324},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3775798976421356},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1464664340019226},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3371220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3371220","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G2172843688","display_name":null,"funder_award_id":"62250009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4303215675","display_name":null,"funder_award_id":"62192781","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4896784468","display_name":null,"funder_award_id":"62137002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4985986209","display_name":null,"funder_award_id":"62272374","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8740505804","display_name":null,"funder_award_id":"62202367","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327721","display_name":"K. C. Wong Education Foundation","ror":null},{"id":"https://openalex.org/F4320330193","display_name":"Chinese Academy of Engineering","ror":"https://ror.org/00z3yke57"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W2016053056","https://openalex.org/W2183341477","https://openalex.org/W2261706002","https://openalex.org/W2524365899","https://openalex.org/W2563717578","https://openalex.org/W2593116425","https://openalex.org/W2619697695","https://openalex.org/W2619947201","https://openalex.org/W2773514261","https://openalex.org/W2798991696","https://openalex.org/W2842511635","https://openalex.org/W2883725317","https://openalex.org/W2913950831","https://openalex.org/W2962699416","https://openalex.org/W2963115079","https://openalex.org/W2963155035","https://openalex.org/W2963509340","https://openalex.org/W2963820951","https://openalex.org/W2964011431","https://openalex.org/W2980037812","https://openalex.org/W2988966271","https://openalex.org/W2990503944","https://openalex.org/W2997312573","https://openalex.org/W2998702515","https://openalex.org/W3035333188","https://openalex.org/W3087124270","https://openalex.org/W3094550259","https://openalex.org/W3110687497","https://openalex.org/W3157190320","https://openalex.org/W3160264863","https://openalex.org/W3168316785","https://openalex.org/W3173843816","https://openalex.org/W3175300676","https://openalex.org/W3176451698","https://openalex.org/W3176463841","https://openalex.org/W3212436340","https://openalex.org/W4206516668","https://openalex.org/W4282937133","https://openalex.org/W4282937884","https://openalex.org/W4283030109","https://openalex.org/W4297808394","https://openalex.org/W4312249250","https://openalex.org/W4312601326","https://openalex.org/W4312682661","https://openalex.org/W4313014573","https://openalex.org/W4313135270","https://openalex.org/W4321488152","https://openalex.org/W6600983433","https://openalex.org/W6682962330","https://openalex.org/W6728047685","https://openalex.org/W6740005241","https://openalex.org/W6747225971","https://openalex.org/W6750523955","https://openalex.org/W6751420435","https://openalex.org/W6751647823","https://openalex.org/W6751883947","https://openalex.org/W6763430915","https://openalex.org/W6770805772","https://openalex.org/W6771630921","https://openalex.org/W6779784029","https://openalex.org/W6779997284","https://openalex.org/W6784235280","https://openalex.org/W6790706061","https://openalex.org/W6803448863","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W2084487854","https://openalex.org/W2353878298","https://openalex.org/W4205118967","https://openalex.org/W2077682749","https://openalex.org/W2298450300","https://openalex.org/W2374729771","https://openalex.org/W2963155035","https://openalex.org/W3200334421","https://openalex.org/W1576128429","https://openalex.org/W2269464716"],"abstract_inverted_index":{"Recently,":[0],"video":[1,64],"recognition":[2,37,95,284],"is":[3,142],"emerging":[4],"with":[5],"the":[6,19,24,61,72,92,131,146,150,155,164,174,188,194,199,215,221,238,243,261,279,282,288],"help":[7],"of":[8,23,41,80,122,176,245,252,270,281],"multi-modal":[9,28,54],"learning,":[10],"which":[11,230],"focuses":[12],"on":[13,45,266],"integrating":[14],"distinct":[15],"modalities":[16,52],"to":[17,106,127,144,149,186,209,226,236,242],"improve":[18],"performance":[20],"or":[21,69],"robustness":[22,280],"model.":[25],"Although":[26],"various":[27],"learning":[29,104],"methods":[30,43],"have":[31],"been":[32],"proposed":[33],"and":[34,49,83,100,115,167,286],"offer":[35],"remarkable":[36],"results,":[38],"almost":[39],"all":[40],"these":[42],"rely":[44],"high-quality":[46],"manual":[47],"annotations":[48],"assume":[50],"that":[51,125,274],"among":[53,224],"data":[55,258],"provide":[56,169],"semantically":[57],"relevant":[58],"information.":[59],"Unfortunately,":[60],"widely":[62],"used":[63,232],"datasets":[65],"are":[66,231],"usually":[67],"coarse-annotated":[68],"collected":[70],"from":[71],"Internet.":[73],"Thus,":[74],"it":[75,161],"inevitably":[76],"contains":[77],"a":[78,98,102,137,181,205,250,267,291],"portion":[79],"noisy":[81,84,113,116,165,177,195,254,271],"labels":[82,114],"correspondence.":[85,117],"To":[86,172],"address":[87],"this":[88],"challenge,":[89],"we":[90,179,202,219,248],"use":[91],"audio-visual":[93,257],"action":[94,283],"task":[96],"as":[97,233],"proxy":[99],"propose":[101,180,204],"noise-tolerant":[103,138],"framework":[105],"find":[107],"anti-interference":[108],"model":[109,147,285],"parameters":[110],"against":[111],"both":[112],"Specifically,":[118],"our":[119,275],"method":[120,276],"consists":[121],"two":[123],"phases":[124],"aim":[126],"rectify":[128],"noise":[129,183],"by":[130,158,259,290],"inherent":[132],"correlation":[133],"between":[134,190],"modalities.":[135,192],"First,":[136],"contrastive":[139,159,207],"training":[140,217],"phase":[141],"performed":[143],"make":[145],"immune":[148],"possible":[151],"noisy-labeled":[152],"data.":[153],"Despite":[154],"benefits":[156],"brought":[157],"training,":[160],"would":[162],"overfit":[163],"correspondence":[166,196,255],"thus":[168],"false":[170],"supervision.":[171],"alleviate":[173],"influence":[175],"correspondence,":[178],"cross-modal":[182],"estimation":[184],"component":[185],"adjust":[187],"consistency":[189],"different":[191],"As":[193],"existed":[197],"at":[198],"instance":[200],"level,":[201],"further":[203],"category-level":[206],"loss":[208],"reduce":[210],"its":[211],"interference.":[212],"Second,":[213],"in":[214,256],"hybrid-supervised":[216],"phase,":[218],"calculate":[220],"distance":[222],"metric":[223],"features":[225],"obtain":[227],"corrected":[228],"labels,":[229],"complementary":[234],"supervision":[235],"guide":[237],"training.":[239],"Furthermore,":[240],"due":[241],"lack":[244],"suitable":[246],"datasets,":[247],"establish":[249],"benchmark":[251],"real-world":[253],"relabeling":[260],"Kinetics":[262],"dataset.":[263],"Extensive":[264],"experiments":[265],"wide":[268],"range":[269],"levels":[272],"demonstrate":[273],"significantly":[277],"improves":[278],"surpasses":[287],"baselines":[289],"clear":[292],"margin.":[293]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
