{"id":"https://openalex.org/W4226383576","doi":"https://doi.org/10.1109/access.2022.3164745","title":"Comparing Learning Methodologies for Self-Supervised Audio-Visual Representation Learning","display_name":"Comparing Learning Methodologies for Self-Supervised Audio-Visual Representation Learning","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4226383576","doi":"https://doi.org/10.1109/access.2022.3164745"},"language":"en","primary_location":{"id":"doi:10.1109/access.2022.3164745","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3164745","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09749114.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09749114.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046840969","display_name":"Hacene Terbouche","orcid":"https://orcid.org/0000-0002-2217-4525"},"institutions":[{"id":"https://openalex.org/I197681013","display_name":"Universit\u00e9 Paris-Est Cr\u00e9teil","ror":"https://ror.org/05ggc9x40","country_code":"FR","type":"education","lineage":["https://openalex.org/I197681013"]},{"id":"https://openalex.org/I2800365227","display_name":"Paris-Est Sup","ror":"https://ror.org/0268ecp52","country_code":"FR","type":"education","lineage":["https://openalex.org/I2800365227"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Hacene Terbouche","raw_affiliation_strings":["LISSI, UPEC, Universit\u00e9 Paris-Est, Vitry-sur-Seine, France","Powder AI Research, Paris, France"],"raw_orcid":"https://orcid.org/0000-0002-2217-4525","affiliations":[{"raw_affiliation_string":"LISSI, UPEC, Universit\u00e9 Paris-Est, Vitry-sur-Seine, France","institution_ids":["https://openalex.org/I2800365227","https://openalex.org/I197681013"]},{"raw_affiliation_string":"Powder AI Research, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025798902","display_name":"Liam Schoneveld","orcid":"https://orcid.org/0000-0002-7324-6234"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liam Schoneveld","raw_affiliation_strings":["Powder AI Research, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Powder AI Research, Paris, France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064826883","display_name":"Oisin Benson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oisin Benson","raw_affiliation_strings":["Powder AI Research, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Powder AI Research, Paris, France","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088943819","display_name":"Alice Othmani","orcid":"https://orcid.org/0000-0002-3442-0578"},"institutions":[{"id":"https://openalex.org/I197681013","display_name":"Universit\u00e9 Paris-Est Cr\u00e9teil","ror":"https://ror.org/05ggc9x40","country_code":"FR","type":"education","lineage":["https://openalex.org/I197681013"]},{"id":"https://openalex.org/I2800365227","display_name":"Paris-Est Sup","ror":"https://ror.org/0268ecp52","country_code":"FR","type":"education","lineage":["https://openalex.org/I2800365227"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Alice Othmani","raw_affiliation_strings":["LISSI, UPEC, Universit\u00e9 Paris-Est, Vitry-sur-Seine, France"],"raw_orcid":"https://orcid.org/0000-0002-3442-0578","affiliations":[{"raw_affiliation_string":"LISSI, UPEC, Universit\u00e9 Paris-Est, Vitry-sur-Seine, France","institution_ids":["https://openalex.org/I2800365227","https://openalex.org/I197681013"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.6351,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.82859525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"10","issue":null,"first_page":"41622","last_page":"41638"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8254967927932739},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6380693912506104},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6220546960830688},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.5228893160820007},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5065239667892456},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.45149654150009155},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.4378800690174103},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.19676876068115234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8254967927932739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6380693912506104},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6220546960830688},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.5228893160820007},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5065239667892456},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.45149654150009155},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.4378800690174103},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.19676876068115234},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2022.3164745","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3164745","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09749114.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-04317315v1","is_oa":false,"landing_page_url":"https://hal.u-pec.fr/hal-04317315","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, 2022, 10, pp.41622-41638. &#x27E8;10.1109/ACCESS.2022.3164745&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:doaj.org/article:feffa8ffd6de4611ad95928fbac35090","is_oa":true,"landing_page_url":"https://doaj.org/article/feffa8ffd6de4611ad95928fbac35090","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 10, Pp 41622-41638 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2022.3164745","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2022.3164745","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9668973/09749114.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4226383576.pdf","grobid_xml":"https://content.openalex.org/works/W4226383576.grobid-xml"},"referenced_works_count":97,"referenced_works":["https://openalex.org/W343636949","https://openalex.org/W764651262","https://openalex.org/W1614298861","https://openalex.org/W1924343884","https://openalex.org/W2100495367","https://openalex.org/W2108598243","https://openalex.org/W2116435618","https://openalex.org/W2144796873","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2321533354","https://openalex.org/W2326925005","https://openalex.org/W2487442924","https://openalex.org/W2493916176","https://openalex.org/W2526050071","https://openalex.org/W2529272619","https://openalex.org/W2550462002","https://openalex.org/W2593116425","https://openalex.org/W2615413256","https://openalex.org/W2757910899","https://openalex.org/W2785325870","https://openalex.org/W2842511635","https://openalex.org/W2883725317","https://openalex.org/W2889467522","https://openalex.org/W2890052321","https://openalex.org/W2896457183","https://openalex.org/W2949099979","https://openalex.org/W2951292523","https://openalex.org/W2962852342","https://openalex.org/W2962931121","https://openalex.org/W2963420272","https://openalex.org/W2963426332","https://openalex.org/W2963814513","https://openalex.org/W2964074409","https://openalex.org/W2986405467","https://openalex.org/W2990408345","https://openalex.org/W3005680577","https://openalex.org/W3014895431","https://openalex.org/W3023371261","https://openalex.org/W3034381931","https://openalex.org/W3035060554","https://openalex.org/W3035118106","https://openalex.org/W3035524453","https://openalex.org/W3036224891","https://openalex.org/W3094454579","https://openalex.org/W3110306138","https://openalex.org/W3134652006","https://openalex.org/W3140854437","https://openalex.org/W3145385912","https://openalex.org/W3159481202","https://openalex.org/W3169064633","https://openalex.org/W3171007011","https://openalex.org/W3174331338","https://openalex.org/W3175300676","https://openalex.org/W4206471589","https://openalex.org/W4229494842","https://openalex.org/W4288024349","https://openalex.org/W4292779060","https://openalex.org/W4297772798","https://openalex.org/W4297808394","https://openalex.org/W4301383669","https://openalex.org/W6636510571","https://openalex.org/W6677326919","https://openalex.org/W6678242812","https://openalex.org/W6681255678","https://openalex.org/W6682948231","https://openalex.org/W6682962330","https://openalex.org/W6685380521","https://openalex.org/W6687483927","https://openalex.org/W6691096134","https://openalex.org/W6723250868","https://openalex.org/W6726983635","https://openalex.org/W6727253422","https://openalex.org/W6730018477","https://openalex.org/W6730323794","https://openalex.org/W6738465933","https://openalex.org/W6744513255","https://openalex.org/W6747899497","https://openalex.org/W6754048563","https://openalex.org/W6755207826","https://openalex.org/W6756485162","https://openalex.org/W6758354414","https://openalex.org/W6764266060","https://openalex.org/W6764350350","https://openalex.org/W6770717842","https://openalex.org/W6770805772","https://openalex.org/W6774314701","https://openalex.org/W6776441977","https://openalex.org/W6778883912","https://openalex.org/W6779326418","https://openalex.org/W6779997284","https://openalex.org/W6781217715","https://openalex.org/W6784660784","https://openalex.org/W6786614245","https://openalex.org/W6790830454","https://openalex.org/W6791742336","https://openalex.org/W6797631482"],"related_works":["https://openalex.org/W2494338568","https://openalex.org/W1495042958","https://openalex.org/W2122678784","https://openalex.org/W2518241345","https://openalex.org/W4390062853","https://openalex.org/W4389256085","https://openalex.org/W4399290976","https://openalex.org/W4285328440","https://openalex.org/W4313644201","https://openalex.org/W2515319207"],"abstract_inverted_index":{"In":[0,30],"recent":[1],"years,":[2],"the":[3,131],"machine":[4],"learning":[5,40,136],"community":[6],"has":[7,21],"devoted":[8],"an":[9],"increasing":[10],"attention":[11],"to":[12,71],"self-supervised":[13,20,35,135],"learning.The":[14],"performance":[15],"gap":[16],"between":[17],"supervised":[18],"and":[19,61,69,93],"become":[22],"increasingly":[23],"narrow":[24],"in":[25,137],"many":[26],"computer":[27],"vision":[28],"applications.":[29],"this":[31],"paper,":[32],"a":[33,55,65,102,138],"new":[34,103],"approach":[36,50,98],"is":[37,99],"proposed":[38,97],"for":[39],"audio-visual":[41],"representations":[42,53,75],"from":[43,110],"large":[44],"databases":[45],"of":[46,57,107,134],"unlabeled":[47],"videos.":[48],"Our":[49],"learns":[51,70],"its":[52,73,77],"by":[54],"combination":[56],"two":[58],"tasks:":[59],"unimodal":[60],"cross-modal.":[62],"It":[63],"uses":[64],"future":[66],"prediction":[67],"task,":[68],"align":[72],"visual":[74],"with":[76],"corresponding":[78],"audio":[79],"representations.":[80],"To":[81],"implement":[82],"these":[83],"tasks,":[84,124],"three":[85],"methodologies":[86],"are":[87],"assessed:":[88],"contrastive":[89],"learning,":[90],"prototypical":[91],"constrasting":[92],"redundancy":[94],"reduction.":[95],"The":[96],"evaluated":[100],"on":[101],"publicly":[104],"available":[105],"dataset":[106],"videos":[108],"captured":[109],"video":[111],"game":[112],"gameplay":[113],"footage,":[114],"called":[115],"<italic":[116],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[117],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Videogame":[118],"DB</i>":[119],".":[120],"On":[121],"most":[122],"downstream":[123],"our":[125],"method":[126],"significantly":[127],"outperforms":[128],"baselines,":[129],"demonstrating":[130],"real":[132],"benefits":[133],"real-world":[139],"application.":[140]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
