{"id":"https://openalex.org/W4387968656","doi":"https://doi.org/10.1145/3581783.3612428","title":"Bio-Inspired Audiovisual Multi-Representation Integration via Self-Supervised Learning","display_name":"Bio-Inspired Audiovisual Multi-Representation Integration via Self-Supervised Learning","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387968656","doi":"https://doi.org/10.1145/3581783.3612428"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3612428","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101652242","display_name":"Zhaojian Li","orcid":"https://orcid.org/0000-0001-6700-7010"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhaojian Li","raw_affiliation_strings":["Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101496244","display_name":"Bin Zhao","orcid":"https://orcid.org/0000-0002-0294-8538"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Zhao","raw_affiliation_strings":["Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100334740","display_name":"Yuan Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Yuan","raw_affiliation_strings":["Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101652242"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":0.5874,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.66027821,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3755","last_page":"3764"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7733023166656494},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.6565890312194824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6221569180488586},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6115557551383972},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5828803777694702},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.5680985450744629},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.5349205136299133},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.47213417291641235},{"id":"https://openalex.org/keywords/spatialization","display_name":"Spatialization","score":0.4610781669616699},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4548800587654114},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.4264988899230957},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.42630457878112793},{"id":"https://openalex.org/keywords/crossmodal","display_name":"Crossmodal","score":0.4114738702774048},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3663291335105896},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33865487575531006},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.3335615396499634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7733023166656494},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.6565890312194824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6221569180488586},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6115557551383972},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5828803777694702},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.5680985450744629},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.5349205136299133},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47213417291641235},{"id":"https://openalex.org/C2777031145","wikidata":"https://www.wikidata.org/wiki/Q4430987","display_name":"Spatialization","level":2,"score":0.4610781669616699},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4548800587654114},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.4264988899230957},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.42630457878112793},{"id":"https://openalex.org/C60115397","wikidata":"https://www.wikidata.org/wiki/Q5188732","display_name":"Crossmodal","level":4,"score":0.4114738702774048},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3663291335105896},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33865487575531006},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.3335615396499634},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3612428","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612428","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7300000190734863,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G1195665351","display_name":null,"funder_award_id":"2020YFB2103900","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2210533046","display_name":null,"funder_award_id":"61825603","funder_id":"https://openalex.org/F4320336125","funder_display_name":"National Science Fund for Distinguished Young Scholars"},{"id":"https://openalex.org/G2513133394","display_name":null,"funder_award_id":"61825603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2802911279","display_name":null,"funder_award_id":"Young","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4822847990","display_name":null,"funder_award_id":"62106183","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7410015349","display_name":null,"funder_award_id":"2020YFB2103900","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320336125","display_name":"National Science Fund for Distinguished Young Scholars","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2043969427","https://openalex.org/W2176754121","https://openalex.org/W2194775991","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2619697695","https://openalex.org/W2962865004","https://openalex.org/W2962960500","https://openalex.org/W2963115079","https://openalex.org/W2963680395","https://openalex.org/W2982619606","https://openalex.org/W2982624843","https://openalex.org/W3034742263","https://openalex.org/W3116298410","https://openalex.org/W3133500032","https://openalex.org/W3154852953","https://openalex.org/W3169318522","https://openalex.org/W3173382920","https://openalex.org/W3174854700","https://openalex.org/W3211965499","https://openalex.org/W4220770970","https://openalex.org/W4283709432","https://openalex.org/W4286378963","https://openalex.org/W4297793551","https://openalex.org/W4304083155","https://openalex.org/W4312444931","https://openalex.org/W4312498304","https://openalex.org/W4313048473","https://openalex.org/W4319300024"],"related_works":["https://openalex.org/W2237537322","https://openalex.org/W2950678851","https://openalex.org/W4301248618","https://openalex.org/W2165343651","https://openalex.org/W2242427765","https://openalex.org/W2075830955","https://openalex.org/W2343790552","https://openalex.org/W3165067022","https://openalex.org/W2895918973","https://openalex.org/W3111398917"],"abstract_inverted_index":{"Audiovisual":[0],"self-supervised":[1,108],"representation":[2,18,173,177],"learning":[3,52,111],"has":[4],"made":[5],"significant":[6],"strides":[7],"in":[8,32,39,74],"various":[9],"audiovisual":[10,43,81,91,109,140,195,213],"tasks.":[11],"Existing":[12],"methods":[13],"mostly":[14],"focus":[15],"on":[16,203],"single":[17],"modeling":[19],"between":[20,29,119,134,194],"audio":[21,75,122],"and":[22,76,101,121,127,137,165,191,212],"visual":[23,77,120,135],"modalities,":[24],"ignoring":[25],"the":[26,33,70,116,188,198],"complex":[27],"correspondence":[28,133,193],"them,":[30],"resulting":[31],"inability":[34],"to":[35,68,79,169,200],"execute":[36],"cross-modal":[37],"understanding":[38,187],"a":[40,107,162,166,171],"more":[41,179],"natural":[42],"scene.":[44],"Several":[45],"biological":[46],"studies":[47],"have":[48],"shown":[49],"that":[50,95,175,185],"human":[51],"is":[53,144],"influenced":[54],"by":[55,64,151],"multi-layered":[56],"synchronization":[57],"of":[58],"perception.":[59],"To":[60,130],"this":[61],"end,":[62],"inspired":[63],"biology,":[65],"we":[66,88,105,160],"argue":[67],"exploit":[69],"naturally":[71],"existing":[72],"relationships":[73],"modalities":[78,123,196],"learn":[80,170],"representations":[82],"under":[83],"multilayer":[84],"perceptual":[85,117],"integration.":[86],"Firstly,":[87],"introduce":[89],"an":[90,139],"multi-representation":[92,110],"pretext":[93],"task":[94],"integrates":[96],"semantic":[97],"consistency,":[98],"temporal":[99],"alignment,":[100],"spatial":[102,128,192],"correspondence.":[103],"Secondly,":[104],"propose":[106,161],"approach,":[112],"which":[113,146],"simultaneously":[114],"learns":[115],"relationship":[118],"at":[124,155],"semantic,":[125,189],"temporal,":[126,190],"levels.":[129,157],"establish":[131],"fine-grained":[132],"objects":[136,150],"sounds,":[138],"object":[141],"detection":[142],"module":[143],"proposed,":[145],"detects":[147],"potential":[148],"sounding":[149],"combining":[152],"unsupervised":[153],"knowledge":[154],"multiple":[156],"In":[158],"addition,":[159],"modality-wise":[163],"loss":[164,168],"task-wise":[167],"subspace-orthogonal":[172],"space":[174],"makes":[176],"relations":[178],"discriminative.":[180],"Finally,":[181],"experimental":[182],"results":[183],"demonstrate":[184],"collectively":[186],"enables":[197],"model":[199],"perform":[201],"better":[202],"downstream":[204],"tasks":[205],"such":[206],"as":[207],"sound":[208,210],"separation,":[209],"spatialization,":[211],"segmentation.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
