{"id":"https://openalex.org/W4285606400","doi":"https://doi.org/10.24963/ijcai.2022/506","title":"Active Contrastive Set Mining for Robust Audio-Visual Instance Discrimination","display_name":"Active Contrastive Set Mining for Robust Audio-Visual Instance Discrimination","publication_year":2022,"publication_date":"2022-07-01","ids":{"openalex":"https://openalex.org/W4285606400","doi":"https://doi.org/10.24963/ijcai.2022/506"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2022/506","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2022/506","pdf_url":"https://www.ijcai.org/proceedings/2022/0506.pdf","source":{"id":"https://openalex.org/S4363608755","display_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.ijcai.org/proceedings/2022/0506.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011876855","display_name":"Hanyu Xuan","orcid":"https://orcid.org/0000-0002-4633-2794"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hanyu Xuan","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100297988","display_name":"Yihong Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Yihong Xu","raw_affiliation_strings":["INRIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INRIA","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100366889","display_name":"Shuo Chen","orcid":"https://orcid.org/0000-0003-3848-3397"},"institutions":[{"id":"https://openalex.org/I4210110652","display_name":"RIKEN","ror":"https://ror.org/01sjwvz98","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652"]},{"id":"https://openalex.org/I4210158631","display_name":"RIKEN BNL Research Center","ror":"https://ror.org/05db1vq90","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I200870766","https://openalex.org/I39565521","https://openalex.org/I4210110652","https://openalex.org/I4210142672","https://openalex.org/I4210158631"]}],"countries":["JP","US"],"is_corresponding":false,"raw_author_name":"Shuo Chen","raw_affiliation_strings":["RIKEN","RIKEN BNL Research Center"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RIKEN","institution_ids":["https://openalex.org/I4210110652"]},{"raw_affiliation_string":"RIKEN BNL Research Center","institution_ids":["https://openalex.org/I4210158631"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001023262","display_name":"Zhiliang Wu","orcid":"https://orcid.org/0000-0002-6597-8048"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiliang Wu","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106667216","display_name":"Jian Yang","orcid":"https://orcid.org/0000-0003-2257-2432"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Yang","raw_affiliation_strings":["Nanjing University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100395042","display_name":"Yan Yan","orcid":"https://orcid.org/0000-0002-0187-7010"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yan Yan","raw_affiliation_strings":["Illinois Institute of Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066621495","display_name":"Xavier Alameda-Pineda","orcid":"https://orcid.org/0000-0002-5354-1084"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Xavier Alameda-Pineda","raw_affiliation_strings":["INRIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"INRIA","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5011876855"],"corresponding_institution_ids":["https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":0.2421,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.36795719,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"3643","last_page":"3649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.828475832939148},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6144098043441772},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5884762406349182},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5637915134429932},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5036394000053406},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4418952763080597},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4256688952445984},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4136062264442444},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.41128474473953247},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3521919846534729},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.1400994062423706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.828475832939148},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6144098043441772},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5884762406349182},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5637915134429932},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5036394000053406},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4418952763080597},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4256688952445984},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4136062264442444},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.41128474473953247},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3521919846534729},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.1400994062423706},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.24963/ijcai.2022/506","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2022/506","pdf_url":"https://www.ijcai.org/proceedings/2022/0506.pdf","source":{"id":"https://openalex.org/S4363608755","display_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03906947v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-03906947","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IJCAI 2022 - 31st International Joint Conference on Artificial Intelligence, Jul 2022, Vienna, Austria. pp.3643-3649, &#x27E8;10.24963/ijcai.2022/506&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2022/506","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2022/506","pdf_url":"https://www.ijcai.org/proceedings/2022/0506.pdf","source":{"id":"https://openalex.org/S4363608755","display_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G2092057362","display_name":null,"funder_award_id":"19-P3IA-0003","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4285606400.pdf","grobid_xml":"https://content.openalex.org/works/W4285606400.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1972567154","https://openalex.org/W2103104224","https://openalex.org/W2152790380","https://openalex.org/W2487442924","https://openalex.org/W2745357708","https://openalex.org/W2796992393","https://openalex.org/W2798991696","https://openalex.org/W2883451034","https://openalex.org/W2890052321","https://openalex.org/W2926645869","https://openalex.org/W2948242301","https://openalex.org/W2951585248","https://openalex.org/W2963155035","https://openalex.org/W2963365341","https://openalex.org/W2964037671","https://openalex.org/W2975357369","https://openalex.org/W2990408345","https://openalex.org/W2997909293","https://openalex.org/W3005680577","https://openalex.org/W3009561768","https://openalex.org/W3010874390","https://openalex.org/W3012519163","https://openalex.org/W3034576826","https://openalex.org/W3035060554","https://openalex.org/W3035524453","https://openalex.org/W3108655343","https://openalex.org/W3114632476","https://openalex.org/W3123709248","https://openalex.org/W3171007011","https://openalex.org/W3175300676","https://openalex.org/W3190580390","https://openalex.org/W4226314236","https://openalex.org/W4287608901","https://openalex.org/W4287812705","https://openalex.org/W4289294839","https://openalex.org/W4293665662","https://openalex.org/W4398958419"],"related_works":["https://openalex.org/W2366107444","https://openalex.org/W4388145910","https://openalex.org/W2381570729","https://openalex.org/W1976205134","https://openalex.org/W4248336175","https://openalex.org/W2271369634","https://openalex.org/W2031260042","https://openalex.org/W2391445434","https://openalex.org/W3147472394","https://openalex.org/W2047100085"],"abstract_inverted_index":{"The":[0,130],"recent":[1,138],"success":[2],"of":[3,15,84,164],"audio-visual":[4,16,38],"representation":[5],"learning":[6],"can":[7,19],"be":[8,20],"largely":[9],"attributed":[10],"to":[11,36,105],"their":[12,145],"pervasive":[13],"property":[14],"synchronization,":[17],"which":[18],"used":[21],"as":[22],"self-annotated":[23],"supervision.":[24],"As":[25],"a":[26,81,96,122],"state-of-the-art":[27,139],"solution,":[28],"Audio-Visual":[29],"Instance":[30],"Discrimination":[31],"(AVID)":[32],"extends":[33],"instance":[34],"discrimination":[35],"the":[37,44,52,55,76,107,160],"realm.":[39],"Existing":[40],"AVID":[41,140],"methods":[42,141],"construct":[43],"contrastive":[45,78,108],"set":[46],"by":[47,94],"random":[48],"sampling":[49],"based":[50],"on":[51,150,156],"assumption":[53,72],"that":[54,70,103],"audio":[56],"and":[57,112,142,153],"visual":[58],"clips":[59],"from":[60],"all":[61],"other":[62],"videos":[63],"are":[64],"not":[65],"semantically":[66],"related.":[67],"We":[68],"argue":[69],"this":[71,88,92],"is":[73,133],"rough,":[74],"since":[75],"resulting":[77],"sets":[79,109],"have":[80],"large":[82],"number":[83],"faulty":[85],"negatives.":[86],"In":[87],"paper,":[89],"we":[90,119],"overcome":[91],"limitation":[93],"proposing":[95],"novel":[97],"Active":[98],"Contrastive":[99],"Set":[100],"Mining":[101],"(ACSM)":[102],"aims":[104],"mine":[106],"with":[110],"informative":[111],"diverse":[113],"negatives":[114],"for":[115],"robust":[116],"AVID.":[117],"Moreover,":[118],"also":[120],"integrate":[121],"semantically-aware":[123],"hard-sample":[124],"mining":[125],"strategy":[126],"into":[127,135],"our":[128,165],"ACSM.":[129],"proposed":[131],"ACSM":[132],"implemented":[134],"two":[136],"most":[137],"significantly":[143],"improves":[144],"performance.":[146],"Extensive":[147],"experiments":[148],"conducted":[149],"both":[151],"action":[152],"sound":[154],"recognition":[155],"multiple":[157],"datasets":[158],"show":[159],"remarkably":[161],"improved":[162],"performance":[163],"method.":[166]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2022-07-16T00:00:00"}
