{"id":"https://openalex.org/W4392904417","doi":"https://doi.org/10.1109/icassp48485.2024.10447949","title":"Cross Pseudo-Labeling for Semi-Supervised Audio-Visual Source Localization","display_name":"Cross Pseudo-Labeling for Semi-Supervised Audio-Visual Source Localization","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904417","doi":"https://doi.org/10.1109/icassp48485.2024.10447949"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447949","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447949","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112724309","display_name":"Yuxin Guo","orcid":"https://orcid.org/0009-0008-5036-2575"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxin Guo","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100760815","display_name":"Shijie Ma","orcid":"https://orcid.org/0009-0005-1131-5686"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shijie Ma","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064909561","display_name":"Yuhao Zhao","orcid":"https://orcid.org/0009-0009-3802-8694"},"institutions":[{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhao Zhao","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050531685","display_name":"Hu Su","orcid":"https://orcid.org/0000-0002-0551-3193"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hu Su","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108286207","display_name":"Wei Zou","orcid":"https://orcid.org/0000-0003-4215-5361"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zou","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences,State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210112150"]},{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation of Chinese Academy of Sciences, School of Artificial Intelligence, University of Chinese Academy of Sciences","institution_ids":["https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5112724309"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210100255","https://openalex.org/I4210112150"],"apc_list":null,"apc_paid":null,"fwci":0.7501,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65376379,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"8356","last_page":"8360"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8459004163742065},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6097663044929504},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5850580930709839},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5369128584861755},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5326380729675293},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4575966000556946},{"id":"https://openalex.org/keywords/sharpening","display_name":"Sharpening","score":0.4406340420246124},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4381277561187744},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.4318193197250366},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3844895660877228},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3615349233150482},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3391783833503723},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.15078213810920715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8459004163742065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6097663044929504},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5850580930709839},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5369128584861755},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5326380729675293},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4575966000556946},{"id":"https://openalex.org/C2781137444","wikidata":"https://www.wikidata.org/wiki/Q237105","display_name":"Sharpening","level":2,"score":0.4406340420246124},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4381277561187744},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.4318193197250366},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3844895660877228},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3615349233150482},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3391783833503723},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.15078213810920715},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447949","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447949","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2526050071","https://openalex.org/W2963680395","https://openalex.org/W2982619606","https://openalex.org/W2986131686","https://openalex.org/W3015371781","https://openalex.org/W3035682985","https://openalex.org/W3091002423","https://openalex.org/W3105352633","https://openalex.org/W3108332675","https://openalex.org/W3108367559","https://openalex.org/W3170088426","https://openalex.org/W4200436929","https://openalex.org/W4212847156","https://openalex.org/W4221167476","https://openalex.org/W4224925617","https://openalex.org/W4283709432","https://openalex.org/W4293665662","https://openalex.org/W4296594718","https://openalex.org/W4312498304","https://openalex.org/W4312653918","https://openalex.org/W4312926266","https://openalex.org/W4372267673","https://openalex.org/W4386072368","https://openalex.org/W4386075532","https://openalex.org/W6729831399","https://openalex.org/W6783539077","https://openalex.org/W6843326755","https://openalex.org/W6862498849"],"related_works":["https://openalex.org/W2329932281","https://openalex.org/W64535957","https://openalex.org/W2043790407","https://openalex.org/W2348902545","https://openalex.org/W2321247741","https://openalex.org/W2136313643","https://openalex.org/W2087564795","https://openalex.org/W2011367085","https://openalex.org/W2381722214","https://openalex.org/W2292822667"],"abstract_inverted_index":{"Audio-Visual":[0],"Source":[1],"Localization":[2],"(AVSL)":[3],"is":[4],"the":[5,13,30,60,75,97],"task":[6],"of":[7],"identifying":[8],"specific":[9],"sounding":[10],"objects":[11],"in":[12],"scene":[14],"given":[15],"audio":[16],"cues.":[17],"In":[18],"our":[19],"work,":[20],"we":[21,43],"focus":[22],"on":[23],"semi-supervised":[24],"AVSL":[25],"with":[26,32,59,70,78,105],"pseudo-labeling.":[27],"To":[28],"address":[29],"issues":[31],"vanilla":[33],"hard":[34],"pseudo-labels":[35,77,104],"including":[36],"bias":[37,65,130],"accumulation,":[38],"noise":[39],"sensitivity,":[40],"and":[41,80,92,131],"instability,":[42],"propose":[44],"a":[45],"novel":[46],"method":[47],"named":[48],"Cross":[49],"Pseudo-Labeling":[50],"(XPL),":[51],"wherein":[52],"two":[53,71],"models":[54,87],"learn":[55],"from":[56],"each":[57],"other":[58],"cross-refine":[61],"mechanism":[62],"to":[63,88,110],"avoid":[64],"accumulation.":[66],"We":[67],"equip":[68],"XPL":[69,118],"effective":[72],"components.":[73],"Firstly,":[74],"soft":[76],"sharpening":[79],"pseudolabel":[81],"exponential":[82],"moving":[83],"average":[84],"mechanisms":[85],"enable":[86],"achieve":[89],"gradual":[90],"self-improvement":[91],"ensure":[93],"stable":[94],"training.":[95],"Secondly,":[96],"curriculum":[98],"data":[99],"selection":[100],"module":[101],"adaptively":[102],"selects":[103],"high":[106],"quality":[107],"during":[108],"training":[109,133],"mitigate":[111],"potential":[112],"bias.":[113],"Experimental":[114],"results":[115],"demonstrate":[116],"that":[117],"significantly":[119],"outperforms":[120],"existing":[121],"methods,":[122],"achieving":[123],"state-of-the-art":[124],"performance":[125],"while":[126],"effectively":[127],"mitigating":[128],"confirmation":[129],"ensuring":[132],"stability.":[134]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
