{"id":"https://openalex.org/W4399146361","doi":"https://doi.org/10.1109/tmm.2024.3405622","title":"BAVS: Bootstrapping Audio-Visual Segmentation by Integrating Foundation Knowledge","display_name":"BAVS: Bootstrapping Audio-Visual Segmentation by Integrating Foundation Knowledge","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4399146361","doi":"https://doi.org/10.1109/tmm.2024.3405622"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3405622","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3405622","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100322208","display_name":"Chen Liu","orcid":"https://orcid.org/0000-0003-3159-0034"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Chen Liu","raw_affiliation_strings":["University of Queensland, St. Lucia, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"University of Queensland, St. Lucia, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032941294","display_name":"Peike Li","orcid":"https://orcid.org/0000-0003-1809-2137"},"institutions":[{"id":"https://openalex.org/I4210158408","display_name":"Matrix Research (United States)","ror":"https://ror.org/04mw0p229","country_code":"US","type":"company","lineage":["https://openalex.org/I4210158408"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peike Li","raw_affiliation_strings":["MatrixVerse AI, Sydney, NSW, Australia","Matrix Verse, USA"],"affiliations":[{"raw_affiliation_string":"MatrixVerse AI, Sydney, NSW, Australia","institution_ids":[]},{"raw_affiliation_string":"Matrix Verse, USA","institution_ids":["https://openalex.org/I4210158408"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115580345","display_name":"Hu Zhang","orcid":"https://orcid.org/0009-0009-9892-9515"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hu Zhang","raw_affiliation_strings":["University of Queensland, St. Lucia, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"University of Queensland, St. Lucia, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019036797","display_name":"Lincheng Li","orcid":"https://orcid.org/0000-0002-6047-0472"},"institutions":[{"id":"https://openalex.org/I4210091137","display_name":"NetEase (China)","ror":"https://ror.org/00fp6fj05","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210091137"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lincheng Li","raw_affiliation_strings":["NetEase Fuxi AI Lab, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"NetEase Fuxi AI Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210091137"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078170935","display_name":"Zi Huang","orcid":"https://orcid.org/0000-0002-9738-4949"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zi Huang","raw_affiliation_strings":["University of Queensland, St. Lucia, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"University of Queensland, St. Lucia, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053719139","display_name":"Dadong Wang","orcid":"https://orcid.org/0000-0003-0409-2259"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dadong Wang","raw_affiliation_strings":["CSIRO, Data61, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"CSIRO, Data61, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003076238","display_name":"Xin Yu","orcid":"https://orcid.org/0000-0002-0269-5649"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xin Yu","raw_affiliation_strings":["University of Queensland, St. Lucia, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"University of Queensland, St. Lucia, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100322208"],"corresponding_institution_ids":["https://openalex.org/I165143802"],"apc_list":null,"apc_paid":null,"fwci":9.1654,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.98694503,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"26","issue":null,"first_page":"10015","last_page":"10028"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9557999968528748,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8309372663497925},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.7857517004013062},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.7114678621292114},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.664108395576477},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6031826734542847},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4474242329597473},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44157612323760986},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3825303912162781},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3085779547691345},{"id":"https://openalex.org/keywords/finance","display_name":"Finance","score":0.06938859820365906}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8309372663497925},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.7857517004013062},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.7114678621292114},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.664108395576477},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6031826734542847},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4474242329597473},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44157612323760986},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3825303912162781},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3085779547691345},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.06938859820365906},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3405622","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3405622","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2747171834","display_name":null,"funder_award_id":"50092128","funder_id":"https://openalex.org/F4320320386","funder_display_name":"Commonwealth Scientific and Industrial Research Organisation"}],"funders":[{"id":"https://openalex.org/F4320320386","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1901129140","https://openalex.org/W2004244725","https://openalex.org/W2030445831","https://openalex.org/W2031489346","https://openalex.org/W2065274193","https://openalex.org/W2102811829","https://openalex.org/W2117539524","https://openalex.org/W2140647972","https://openalex.org/W2194775991","https://openalex.org/W2593116425","https://openalex.org/W2918984654","https://openalex.org/W2955058313","https://openalex.org/W2962914239","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2963680395","https://openalex.org/W2964011431","https://openalex.org/W2969987364","https://openalex.org/W3015371781","https://openalex.org/W3093418761","https://openalex.org/W3104749918","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3165745140","https://openalex.org/W3170088426","https://openalex.org/W3175514052","https://openalex.org/W3213165621","https://openalex.org/W3214311327","https://openalex.org/W4210416950","https://openalex.org/W4213271552","https://openalex.org/W4221167476","https://openalex.org/W4224925617","https://openalex.org/W4296594718","https://openalex.org/W4309660795","https://openalex.org/W4312048190","https://openalex.org/W4312653918","https://openalex.org/W4312815172","https://openalex.org/W4319300806","https://openalex.org/W4323556857","https://openalex.org/W4324319985","https://openalex.org/W4367279925","https://openalex.org/W4386071792","https://openalex.org/W4387969495","https://openalex.org/W4390872864","https://openalex.org/W4399206662","https://openalex.org/W4403386295","https://openalex.org/W6631190155","https://openalex.org/W6691431627","https://openalex.org/W6786183582","https://openalex.org/W6798837711","https://openalex.org/W6809915981","https://openalex.org/W6843326755","https://openalex.org/W6848208918","https://openalex.org/W6850480200","https://openalex.org/W6851397930"],"related_works":["https://openalex.org/W1534274833","https://openalex.org/W3117246195","https://openalex.org/W156620619","https://openalex.org/W2616249226","https://openalex.org/W2098233217","https://openalex.org/W2914363205","https://openalex.org/W2997844990","https://openalex.org/W1598221548","https://openalex.org/W2081850291","https://openalex.org/W1522196789"],"abstract_inverted_index":{"Given":[0],"an":[1,24,122,190,204],"audio-visual":[2,4,85,119,191,205,234],"pair,":[3],"segmentation":[5,86,115,132],"(AVS)":[6],"aims":[7],"to":[8,104,134,158,196],"locate":[9],"sounding":[10,137],"sources":[11],"by":[12,88,116,145,167,231],"predicting":[13],"pixel-wise":[14],"maps.":[15],"Previous":[16],"methods":[17],"assume":[18],"that":[19,39],"each":[20],"sound":[21,75],"component":[22],"in":[23,32,50,114,121,184,256],"audio":[25,48,64,147,155,160,164,169,178,229],"signal":[26],"always":[27],"has":[28],"a":[29,59,82,98,131,153],"visual":[30,66,140],"counterpart":[31],"the":[33,47,106,118,126,163,168,185,198,209,220,224,233,247],"image.":[34],"However,":[35],"this":[36,78],"assumption":[37],"overlooks":[38],"off-screen":[40,112],"sounds":[41,113,213],"and":[42,65,71,214,227],"background":[43,109,259],"noise":[44,110],"often":[45],"contaminate":[46],"recordings":[49],"real-world":[51],"scenarios.":[52],"They":[53],"impose":[54],"significant":[55],"challenges":[56],"on":[57,208,252],"building":[58],"consistent":[60],"semantic":[61,192],"mapping":[62],"between":[63,212,223],"signals":[67],"for":[68],"AVS":[69,253],"models":[70],"thus":[72],"impede":[73],"precise":[74],"localization.":[76],"In":[77,97,125],"work,":[79],"we":[80,129,150,188,202,238],"propose":[81],"two-stage":[83],"bootstrapping":[84],"framework":[87],"incorporating":[89],"multi-modal":[90],"foundation":[91,154,170],"knowledge":[92],"<inline-formula":[93],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[94,266],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[95],"notation=\"LaTeX\">$^{1}$</tex-math></inline-formula>":[96],"nutshell,":[99],"our":[100,250],"BAVS":[101],"is":[102,180,264],"designed":[103],"eliminate":[105],"interference":[107],"of":[108,249],"or":[111],"establishing":[117],"correspondences":[120],"explicit":[123],"manner.":[124],"first":[127],"stage,":[128,187],"employ":[130],"model":[133,157,171],"localize":[135,197],"potential":[136],"objects":[138,226],"from":[139],"data":[141],"without":[142],"being":[143],"affected":[144],"contaminated":[146],"signals.":[148],"Meanwhile,":[149],"also":[151],"utilize":[152],"classification":[156],"discern":[159],"semantics.":[161],"Considering":[162],"tags":[165,179,230],"provided":[166],"are":[172],"noisy,":[173],"associating":[174],"object":[175,215],"masks":[176],"with":[177],"not":[181],"trivial.":[182],"Thus,":[183],"second":[186],"develop":[189],"integration":[193],"strategy":[194],"(AVIS)":[195],"authentic-sounding":[199],"objects.":[200,243],"Here,":[201],"construct":[203],"tree":[206],"based":[207],"hierarchical":[210],"correspondence":[211],"categories.":[216],"We":[217],"then":[218],"examine":[219],"label":[221],"concurrency":[222],"localized":[225],"classified":[228],"tracing":[232],"tree.":[235],"With":[236],"AVIS,":[237],"can":[239],"effectively":[240],"segment":[241],"real-sounding":[242],"Extensive":[244],"experiments":[245],"demonstrate":[246],"superiority":[248],"method":[251],"datasets,":[254],"particularly":[255],"scenarios":[257],"involving":[258],"noise.":[260],"Our":[261],"project":[262],"website":[263],"<uri":[265],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://yenanliu.github.io/AVSS.github.io/</uri>":[267],".":[268]},"counts_by_year":[{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
