{"id":"https://openalex.org/W4413472233","doi":"https://doi.org/10.1109/tmm.2025.3599048","title":"Complementary and Contrastive Learning for Audio-Visual Segmentation","display_name":"Complementary and Contrastive Learning for Audio-Visual Segmentation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4413472233","doi":"https://doi.org/10.1109/tmm.2025.3599048"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2025.3599048","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3599048","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Sitong Gong","orcid":"https://orcid.org/0009-0001-8661-1093"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sitong Gong","raw_affiliation_strings":["School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048521072","display_name":"Yunzhi Zhuge","orcid":"https://orcid.org/0000-0002-4288-4516"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunzhi Zhuge","raw_affiliation_strings":["School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lu Zhang","orcid":"https://orcid.org/0000-0003-4648-4437"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lu Zhang","raw_affiliation_strings":["School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451855","display_name":"Pingping Zhang","orcid":"https://orcid.org/0000-0003-1206-1444"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pingping Zhang","raw_affiliation_strings":["School of Future Technology, Dalian University of Technology, Dalian, China","School of Future Technology and the School of Artificial Intelligence, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Future Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Future Technology and the School of Artificial Intelligence, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006986293","display_name":"Huchuan Lu","orcid":"https://orcid.org/0000-0002-6668-9758"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huchuan Lu","raw_affiliation_strings":["School of Future Technology, Dalian University of Technology, Dalian, China","School of Future Technology and the School of Artificial Intelligence, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Future Technology, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]},{"raw_affiliation_string":"School of Future Technology and the School of Artificial Intelligence, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":2.8257,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91530995,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"27","issue":null,"first_page":"7407","last_page":"7418"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9825999736785889,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9420999884605408,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8475969433784485},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6102801561355591},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5725902318954468},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5019090175628662},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4618360996246338},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4507552981376648},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40540438890457153},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.36700063943862915},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.356878399848938},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34742122888565063}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8475969433784485},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6102801561355591},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5725902318954468},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5019090175628662},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4618360996246338},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4507552981376648},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40540438890457153},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36700063943862915},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.356878399848938},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34742122888565063}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3599048","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3599048","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1358076444","display_name":null,"funder_award_id":"DUT24YG119","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G3080167161","display_name":null,"funder_award_id":"62206039","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4319081443","display_name":null,"funder_award_id":"DUT24RC(3)025","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4585823974","display_name":null,"funder_award_id":"62441231","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6387696347","display_name":null,"funder_award_id":"62406053","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2526050071","https://openalex.org/W2588240745","https://openalex.org/W2593116425","https://openalex.org/W2810334075","https://openalex.org/W2885675825","https://openalex.org/W2907536214","https://openalex.org/W2962914239","https://openalex.org/W3015785290","https://openalex.org/W3096609285","https://openalex.org/W3108367559","https://openalex.org/W3154852953","https://openalex.org/W3171516518","https://openalex.org/W3175515048","https://openalex.org/W3182236906","https://openalex.org/W3187885408","https://openalex.org/W3214311327","https://openalex.org/W4225575592","https://openalex.org/W4226024706","https://openalex.org/W4291920479","https://openalex.org/W4312815172","https://openalex.org/W4312926266","https://openalex.org/W4367146821","https://openalex.org/W4385767885","https://openalex.org/W4386071535","https://openalex.org/W4386072368","https://openalex.org/W4387682108","https://openalex.org/W4387969495","https://openalex.org/W4390872419","https://openalex.org/W4390872864","https://openalex.org/W4390873537","https://openalex.org/W4393159092","https://openalex.org/W4393160420","https://openalex.org/W4393178550","https://openalex.org/W4395703093","https://openalex.org/W4399146361","https://openalex.org/W4399939183","https://openalex.org/W4402703108","https://openalex.org/W4402733565","https://openalex.org/W4402754134","https://openalex.org/W4402916449","https://openalex.org/W4403386295","https://openalex.org/W4403758862","https://openalex.org/W4403889737","https://openalex.org/W4406658392","https://openalex.org/W4411550739"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W1522196789"],"abstract_inverted_index":{"Audio-Visual":[0],"Segmentation":[1],"(AVS)":[2],"aims":[3],"to":[4,69,79,137,185],"generate":[5],"pixel-wise":[6],"segmentation":[7,31],"maps":[8],"that":[9,124],"correlate":[10],"with":[11,23,118,134,166],"the":[12,30,93,119,143,149,156,171,180,187,193,198,211],"auditory":[13,62],"signals":[14],"of":[15,151,201],"objects.":[16],"This":[17],"field":[18],"has":[19],"seen":[20],"significant":[21],"progress":[22],"numerous":[24],"CNN":[25,36],"and":[26,33,46,83,95,107,110,147,169,173,214],"Transformer-based":[27,59],"methods":[28,60],"enhancing":[29],"accuracy":[32],"robustness.":[34],"Traditional":[35],"approaches":[37],"manage":[38],"audio-visual":[39,71],"interactions":[40],"through":[41],"basic":[42],"operations":[43],"like":[44],"padding":[45],"multiplications":[47],"but":[48],"are":[49],"restricted":[50],"by":[51],"CNNs'":[52],"limited":[53],"local":[54,106],"receptive":[55],"field.":[56],"More":[57],"recently,":[58],"treat":[61],"cues":[63],"as":[64],"queries,":[65],"utilizing":[66],"attention":[67],"mechanisms":[68],"enhance":[70],"cooperation":[72],"within":[73],"frames.":[74],"Nevertheless,":[75],"they":[76],"typically":[77],"struggle":[78],"extract":[80,142],"multimodal":[81],"coefficients":[82],"temporal":[84,152],"dynamics":[85],"adequately.":[86],"To":[87,141],"overcome":[88],"these":[89],"limitations,":[90],"we":[91,154,178],"present":[92],"Complementary":[94],"Contrastive":[96,182],"Transformer":[97,158],"(CCFormer),":[98],"a":[99,126],"novel":[100],"framework":[101],"adept":[102],"at":[103],"processing":[104],"both":[105,190],"global":[108],"information":[109],"capturing":[111],"spatial-temporal":[112],"context":[113],"comprehensively.":[114],"Our":[115],"CCFormer":[116],"initiates":[117],"Early":[120],"Integration":[121],"Module":[122,159],"(EIM)":[123],"employs":[125],"parallel":[127],"bilateral":[128],"architecture,":[129],"merging":[130],"multi-scale":[131],"visual":[132],"features":[133,146],"audio":[135,164],"data":[136],"boost":[138],"cross-modal":[139],"complementarity.":[140],"intra-frame":[144],"spatial":[145],"facilitate":[148],"perception":[150],"coherence,":[153],"introduce":[155],"Multi-query":[157],"(MTM),":[160],"which":[161],"dynamically":[162],"endows":[163],"queries":[165],"learning":[167],"capabilities":[168],"models":[170],"frame":[172],"video-level":[174],"relations":[175],"simultaneously.":[176],"Furthermore,":[177],"propose":[179],"Bi-modal":[181],"Learning":[183],"(BCL)":[184],"promote":[186],"alignment":[188],"across":[189,210],"modalities":[191],"in":[192],"unified":[194],"feature":[195],"space.":[196],"Through":[197],"effective":[199],"combination":[200],"those":[202],"designs,":[203],"our":[204],"method":[205],"sets":[206],"new":[207],"state-of-the-art":[208],"benchmarks":[209],"S4,":[212],"MS3":[213],"AVSS":[215],"datasets.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
