{"id":"https://openalex.org/W4405717843","doi":"https://doi.org/10.1109/tmm.2024.3521746","title":"Cross-Modal Cognitive Consensus Guided Audio\u2013Visual Segmentation","display_name":"Cross-Modal Cognitive Consensus Guided Audio\u2013Visual Segmentation","publication_year":2024,"publication_date":"2024-12-23","ids":{"openalex":"https://openalex.org/W4405717843","doi":"https://doi.org/10.1109/tmm.2024.3521746"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3521746","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521746","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019769019","display_name":"Zhaofeng Shi","orcid":"https://orcid.org/0000-0001-6313-8670"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhaofeng Shi","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075737786","display_name":"Qingbo Wu","orcid":"https://orcid.org/0000-0003-2936-6340"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingbo Wu","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100617043","display_name":"Fanman Meng","orcid":"https://orcid.org/0000-0002-3016-2567"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fanman Meng","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044954967","display_name":"Linfeng Xu","orcid":"https://orcid.org/0000-0002-9934-0958"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linfeng Xu","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114378292","display_name":"Hongliang Li","orcid":"https://orcid.org/0000-0002-7481-095X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongliang Li","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5019769019"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":1.369,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82699237,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"27","issue":null,"first_page":"209","last_page":"223"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.833625078201294},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6392587423324585},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5158785581588745},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5058479309082031},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.464181125164032},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4224473834037781},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3797217905521393},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.33063095808029175},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3163548409938812}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.833625078201294},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6392587423324585},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5158785581588745},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5058479309082031},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.464181125164032},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4224473834037781},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3797217905521393},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.33063095808029175},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3163548409938812},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3521746","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521746","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1864209190","display_name":null,"funder_award_id":"2021ZD0112001","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G2459130074","display_name":null,"funder_award_id":"U23A20286","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8366541121","display_name":null,"funder_award_id":"2023NSFSC1972","funder_id":"https://openalex.org/F4320329861","funder_display_name":"Natural Science Foundation of Sichuan Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null},{"id":"https://openalex.org/F4320329861","display_name":"Natural Science Foundation of Sichuan Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":120,"referenced_works":["https://openalex.org/W1499991161","https://openalex.org/W1507506748","https://openalex.org/W1903029394","https://openalex.org/W2030543058","https://openalex.org/W2117539524","https://openalex.org/W2171061940","https://openalex.org/W2187051054","https://openalex.org/W2250539671","https://openalex.org/W2251264718","https://openalex.org/W2412782625","https://openalex.org/W2526050071","https://openalex.org/W2560023338","https://openalex.org/W2564998703","https://openalex.org/W2593116425","https://openalex.org/W2604379605","https://openalex.org/W2619697695","https://openalex.org/W2765793020","https://openalex.org/W2799157347","https://openalex.org/W2889658408","https://openalex.org/W2890853604","https://openalex.org/W2911379778","https://openalex.org/W2916797271","https://openalex.org/W2955084925","https://openalex.org/W2957408986","https://openalex.org/W2962739339","https://openalex.org/W2962858109","https://openalex.org/W2962893388","https://openalex.org/W2963091558","https://openalex.org/W2963115079","https://openalex.org/W2963150697","https://openalex.org/W2963253279","https://openalex.org/W2963354481","https://openalex.org/W2963383962","https://openalex.org/W2963503215","https://openalex.org/W2963680395","https://openalex.org/W2963857746","https://openalex.org/W2963912736","https://openalex.org/W2964061809","https://openalex.org/W2964226882","https://openalex.org/W2965182628","https://openalex.org/W2982619606","https://openalex.org/W2983693499","https://openalex.org/W2983965928","https://openalex.org/W2999219213","https://openalex.org/W3015371781","https://openalex.org/W3016000368","https://openalex.org/W3024979138","https://openalex.org/W3034804856","https://openalex.org/W3036328963","https://openalex.org/W3048939150","https://openalex.org/W3076947077","https://openalex.org/W3088227529","https://openalex.org/W3089887959","https://openalex.org/W3094550259","https://openalex.org/W3094664776","https://openalex.org/W3104844437","https://openalex.org/W3108367559","https://openalex.org/W3116298410","https://openalex.org/W3127947687","https://openalex.org/W3138516171","https://openalex.org/W3153167542","https://openalex.org/W3169318522","https://openalex.org/W3170088426","https://openalex.org/W3170410843","https://openalex.org/W3170511209","https://openalex.org/W3170630188","https://openalex.org/W3170841864","https://openalex.org/W3174459789","https://openalex.org/W3175515048","https://openalex.org/W3197715576","https://openalex.org/W3199130398","https://openalex.org/W3215180973","https://openalex.org/W3215899623","https://openalex.org/W3217061668","https://openalex.org/W4200631575","https://openalex.org/W4226024706","https://openalex.org/W4293363567","https://openalex.org/W4295046678","https://openalex.org/W4303578909","https://openalex.org/W4304084115","https://openalex.org/W4307977675","https://openalex.org/W4310255405","https://openalex.org/W4312420092","https://openalex.org/W4312616451","https://openalex.org/W4312655926","https://openalex.org/W4312690830","https://openalex.org/W4312762144","https://openalex.org/W4312974570","https://openalex.org/W4313153210","https://openalex.org/W4366310326","https://openalex.org/W4387969090","https://openalex.org/W4387969151","https://openalex.org/W4390871756","https://openalex.org/W4390872864","https://openalex.org/W4390873204","https://openalex.org/W4393159092","https://openalex.org/W4393160420","https://openalex.org/W4395703093","https://openalex.org/W4399146361","https://openalex.org/W4402703118","https://openalex.org/W4402753794","https://openalex.org/W6636510571","https://openalex.org/W6746466744","https://openalex.org/W6755207826","https://openalex.org/W6756871312","https://openalex.org/W6766978945","https://openalex.org/W6782657842","https://openalex.org/W6783539077","https://openalex.org/W6791353385","https://openalex.org/W6793746569","https://openalex.org/W6797399245","https://openalex.org/W6803674551","https://openalex.org/W6809915981","https://openalex.org/W6810005275","https://openalex.org/W6810980340","https://openalex.org/W6840058269","https://openalex.org/W6848208918","https://openalex.org/W6853447796","https://openalex.org/W6854713258","https://openalex.org/W6881022228"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Audio-Visual":[0],"Segmentation":[1,197,204],"(AVS)":[2],"aims":[3],"to":[4,87,109,140,163,186],"extract":[5,141],"the":[6,48,56,72,90,111,115,123,159,164,168,182,187,193,208,212,215],"sounding":[7],"object":[8],"from":[9,114],"a":[10,17,62,102,131,142,173],"video":[11,27,73],"frame,":[12],"which":[13,46,85,180,218],"is":[14,138],"represented":[15],"by":[16,145],"pixel-wise":[18],"segmentation":[19],"mask":[20],"for":[21],"application":[22],"scenarios":[23],"such":[24],"as":[25,167],"multi-modal":[26],"editing,":[28],"augmented":[29],"reality,":[30],"and":[31,118,150,200],"intelligent":[32],"robot":[33],"systems.":[34],"The":[35],"pioneering":[36],"work":[37],"conducts":[38],"this":[39,98],"task":[40],"through":[41],"dense":[42],"feature-level":[43],"audio-visual":[44,112],"interaction,":[45],"ignores":[47],"dimension":[49,117],"gap":[50],"between":[51],"different":[52,80,95],"modalities.":[53],"More":[54],"specifically,":[55],"audio":[57],"clip":[58],"could":[59],"only":[60],"provide":[61],"<italic":[63,81],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[64,82],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Global</i>":[65],"semantic":[66,77],"label":[67,144,154,161],"in":[68],"each":[69],"sequence,":[70],"but":[71,93],"frame":[74],"covers":[75],"multiple":[76],"objects":[78],"across":[79],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Local</i>":[83],"regions,":[84],"leads":[86],"mislocalization":[88],"of":[89,152,207,214],"representationally":[91],"similar":[92],"semantically":[94],"object.":[96,189],"In":[97],"paper,":[99],"we":[100,157],"propose":[101],"Cross-modal":[103,132],"Cognitive":[104,133,174],"Consensus":[105,134,175],"guided":[106,176],"Network":[107],"(C3N)":[108],"align":[110],"semantics":[113],"global":[116],"progressively":[119],"inject":[120],"them":[121],"into":[122],"local":[124,183],"regions":[125],"via":[126,172],"an":[127],"attention":[128],"mechanism.":[129],"Firstly,":[130],"Inference":[135],"Module":[136,178],"(C3IM)":[137],"developed":[139],"unified-modal":[143,160],"integrating":[146],"audio/visual":[147],"classification":[148],"confidence":[149],"similarities":[151],"modality-agnostic":[153],"embeddings.":[155],"Then,":[156],"feed":[158],"back":[162],"visual":[165],"backbone":[166],"explicit":[169],"semantic-level":[170],"guidance":[171],"Attention":[177],"(CCAM),":[179],"highlights":[181],"features":[184],"corresponding":[185],"interested":[188],"Extensive":[190],"experiments":[191],"on":[192],"Single":[194],"Sound":[195,202],"Source":[196,203],"(S4)":[198],"setting":[199,206],"Multiple":[201],"(MS3)":[205],"AVSBench":[209],"dataset":[210],"demonstrate":[211],"effectiveness":[213],"proposed":[216],"method,":[217],"achieves":[219],"state-of-the-art":[220],"performance.":[221]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
