{"id":"https://openalex.org/W4416798674","doi":"https://doi.org/10.1109/apsipaasc65261.2025.11249412","title":"Visually-Informed Multichannel Sound Source Separation Based on 3D Gaussian Primitives","display_name":"Visually-Informed Multichannel Sound Source Separation Based on 3D Gaussian Primitives","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4416798674","doi":"https://doi.org/10.1109/apsipaasc65261.2025.11249412"},"language":null,"primary_location":{"id":"doi:10.1109/apsipaasc65261.2025.11249412","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc65261.2025.11249412","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105697304","display_name":"H. Asano","orcid":null},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Haruaki Asano","raw_affiliation_strings":["Graduate School of Engineering, Kyoto University,Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, Kyoto University,Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120437451","display_name":"Ryunosuke Nihei","orcid":null},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryunosuke Nihei","raw_affiliation_strings":["Graduate School of Engineering, Kyoto University,Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, Kyoto University,Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007353694","display_name":"Yoshiaki Bando","orcid":"https://orcid.org/0000-0002-3934-0745"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshiaki Bando","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST),Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST),Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082272880","display_name":"Aditya Arie Nugraha","orcid":"https://orcid.org/0000-0001-5424-747X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aditya Arie Nugraha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014095339","display_name":"Diego Di Carlo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Diego Di Carlo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077739319","display_name":"Hiroyuki Ueda","orcid":"https://orcid.org/0000-0002-3938-2208"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroyuki Ueda","raw_affiliation_strings":["Graduate School of Engineering, Kyoto University,Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, Kyoto University,Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028906527","display_name":"Yosuke Ito","orcid":"https://orcid.org/0009-0009-4257-7530"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yosuke Ito","raw_affiliation_strings":["Graduate School of Engineering, Kyoto University,Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, Kyoto University,Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067956319","display_name":"Kazuyoshi Yoshii","orcid":"https://orcid.org/0000-0001-8387-8609"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuyoshi Yoshii","raw_affiliation_strings":["Graduate School of Engineering, Kyoto University,Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Engineering, Kyoto University,Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5105697304"],"corresponding_institution_ids":["https://openalex.org/I22299242"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.47829738,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"36","last_page":"41"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.944599986076355,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.944599986076355,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.01720000058412552,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.007799999788403511,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/source-separation","display_name":"Source separation","score":0.6309999823570251},{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.5788999795913696},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5634999871253967},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5414999723434448},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5153999924659729},{"id":"https://openalex.org/keywords/blind-signal-separation","display_name":"Blind signal separation","score":0.5023999810218811},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.4677000045776367},{"id":"https://openalex.org/keywords/maxima-and-minima","display_name":"Maxima and minima","score":0.44760000705718994},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4397999942302704},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3970000147819519}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.704800009727478},{"id":"https://openalex.org/C2776864781","wikidata":"https://www.wikidata.org/wiki/Q52617913","display_name":"Source separation","level":2,"score":0.6309999823570251},{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.5788999795913696},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5634999871253967},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5414999723434448},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5153999924659729},{"id":"https://openalex.org/C120317606","wikidata":"https://www.wikidata.org/wiki/Q17105967","display_name":"Blind signal separation","level":3,"score":0.5023999810218811},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.4677000045776367},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4636000096797943},{"id":"https://openalex.org/C186633575","wikidata":"https://www.wikidata.org/wiki/Q845060","display_name":"Maxima and minima","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4449999928474426},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4397999942302704},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42329999804496765},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3970000147819519},{"id":"https://openalex.org/C185142706","wikidata":"https://www.wikidata.org/wiki/Q1134404","display_name":"Covariance matrix","level":2,"score":0.39660000801086426},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.39239999651908875},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.3878999948501587},{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.3804999887943268},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3725999891757965},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.34380000829696655},{"id":"https://openalex.org/C51432778","wikidata":"https://www.wikidata.org/wiki/Q1259145","display_name":"Independent component analysis","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.3131999969482422},{"id":"https://openalex.org/C93240960","wikidata":"https://www.wikidata.org/wiki/Q217270","display_name":"Acoustic source localization","level":3,"score":0.3102000057697296},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.30230000615119934},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C4199805","wikidata":"https://www.wikidata.org/wiki/Q2725903","display_name":"Gaussian noise","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2912999987602234},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.28349998593330383},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C166550679","wikidata":"https://www.wikidata.org/wiki/Q263400","display_name":"Gaussian network model","level":3,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc65261.2025.11249412","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc65261.2025.11249412","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1845880232","https://openalex.org/W1974647903","https://openalex.org/W1996355918","https://openalex.org/W2113990625","https://openalex.org/W2127851351","https://openalex.org/W2141884927","https://openalex.org/W2143027228","https://openalex.org/W2168273590","https://openalex.org/W2412956798","https://openalex.org/W2763188033","https://openalex.org/W2803322398","https://openalex.org/W2937284863","https://openalex.org/W2964208065","https://openalex.org/W3081267827","https://openalex.org/W3109585842","https://openalex.org/W3212886388","https://openalex.org/W4225316950","https://openalex.org/W4293363567","https://openalex.org/W4362653188","https://openalex.org/W4372259962","https://openalex.org/W4376644566","https://openalex.org/W4385318467","https://openalex.org/W4403126470","https://openalex.org/W4404002612","https://openalex.org/W4408355408","https://openalex.org/W4413156234"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"visually-informed":[3],"sound":[4,92],"source":[5,42,93,135],"separation":[6,43],"for":[7,198],"audio-visual":[8,200],"understanding":[9],"of":[10,26,39,59,76,86,98,111,133,154,178],"indoor":[11],"scenes":[12],"captured":[13],"by":[14,129],"distributed":[15],"microphone":[16],"arrays":[17],"and":[18,53],"cameras.":[19],"Our":[20,78],"approach":[21],"leverages":[22],"the":[23,56,69,84,131,147,162,176,179,187],"3D":[24,31,87,163,172,192],"information":[25],"sound-emitting":[27],"objects,":[28],"reconstructed":[29],"via":[30],"Gaussian":[32,164],"splatting":[33],"(3DGS),":[34],"to":[35,63,68,82,114,190],"overcome":[36],"a":[37,90,96,143,151,195],"limitation":[38],"modern":[40],"blind":[41],"methods":[44],"like":[45],"multichannel":[46],"nonnegative":[47],"matrix":[48],"factorization":[49],"(MNMF).":[50],"While":[51],"adaptable":[52],"potentially":[54],"performant,":[55],"iterative":[57],"optimization":[58],"MNMF":[60,128],"often":[61],"converges":[62],"poor":[64],"local":[65],"minima":[66],"due":[67],"highly-expressive":[70],"full-rank":[71],"spatial":[72],"covariance":[73],"matrices":[74],"(SCMs)":[75],"sources.":[77],"key":[79],"idea":[80],"is":[81,186],"treat":[83],"set":[85],"Gaussians":[88,193],"representing":[89],"sizable":[91],"object":[94,136],"as":[95,194],"collection":[97],"sub-sources":[99],"that":[100,145],"share":[101],"an":[102,119],"audio":[103],"signal":[104],"but":[105],"have":[106],"unique":[107],"emission":[108],"weights,":[109],"both":[110],"which":[112,157],"are":[113,158],"be":[115],"estimated":[116],"jointly":[117],"from":[118,161],"observed":[120],"mixture.":[121],"To":[122,182],"enforce":[123],"this":[124,185],"structure,":[125],"we":[126,141],"guide":[127],"regularizing":[130],"SCM":[132,148],"each":[134,138],"at":[137],"frequency.":[139],"Specifically,":[140],"use":[142,191],"prior":[144],"centers":[146],"estimate":[149],"around":[150],"weighted":[152],"sum":[153],"theoretical":[155],"SCMs,":[156],"analytically":[159],"derived":[160],"positions.":[165],"Experiments":[166],"with":[167],"simulated":[168],"data,":[169],"featuring":[170],"two":[171],"human":[173],"models,":[174],"demonstrated":[175],"effectiveness":[177],"proposed":[180],"method.":[181],"our":[183],"knowledge,":[184],"first":[188],"work":[189],"common":[196],"primitive":[197],"joint":[199],"analysis.":[201]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
