{"id":"https://openalex.org/W4408355192","doi":"https://doi.org/10.1109/icassp49660.2025.10890048","title":"Neural Ambisonic Encoding For Multi-Speaker Scenarios Using A Circular Microphone Array","display_name":"Neural Ambisonic Encoding For Multi-Speaker Scenarios Using A Circular Microphone Array","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355192","doi":"https://doi.org/10.1109/icassp49660.2025.10890048"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890048","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890048","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011568914","display_name":"Yue Qiao","orcid":"https://orcid.org/0000-0003-1095-3115"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yue Qiao","raw_affiliation_strings":["3D3A Lab, Princeton University,Princeton,USA"],"affiliations":[{"raw_affiliation_string":"3D3A Lab, Princeton University,Princeton,USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047630225","display_name":"Vinay Kothapally","orcid":"https://orcid.org/0000-0002-2111-3333"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vinay Kothapally","raw_affiliation_strings":["Tencent AI Lab,Bellevue,USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Bellevue,USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106407019","display_name":"Meng Yu","orcid":"https://orcid.org/0000-0002-0031-9156"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Meng Yu","raw_affiliation_strings":["Tencent AI Lab,Bellevue,USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Bellevue,USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab,Bellevue,USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab,Bellevue,USA","institution_ids":["https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5011568914"],"corresponding_institution_ids":["https://openalex.org/I20089843"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0561278,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambisonics","display_name":"Ambisonics","score":0.8212672472000122},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6893764734268188},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.6662453413009644},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.6661598682403564},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.5031208395957947},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49832677841186523},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.33222496509552},{"id":"https://openalex.org/keywords/loudspeaker","display_name":"Loudspeaker","score":0.17082494497299194},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16375526785850525},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09982907772064209},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09880587458610535}],"concepts":[{"id":"https://openalex.org/C47726159","wikidata":"https://www.wikidata.org/wiki/Q457547","display_name":"Ambisonics","level":3,"score":0.8212672472000122},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6893764734268188},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.6662453413009644},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.6661598682403564},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.5031208395957947},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49832677841186523},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.33222496509552},{"id":"https://openalex.org/C157138929","wikidata":"https://www.wikidata.org/wiki/Q570","display_name":"Loudspeaker","level":2,"score":0.17082494497299194},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16375526785850525},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09982907772064209},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09880587458610535},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890048","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890048","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1989337816","https://openalex.org/W2029671051","https://openalex.org/W2037949455","https://openalex.org/W2056212252","https://openalex.org/W2494085323","https://openalex.org/W2772289775","https://openalex.org/W2937949426","https://openalex.org/W2964058413","https://openalex.org/W3163912061","https://openalex.org/W3177116139","https://openalex.org/W4210774825","https://openalex.org/W4214652421","https://openalex.org/W4285294643","https://openalex.org/W4312384165","https://openalex.org/W4322731305","https://openalex.org/W4380992803","https://openalex.org/W4385245566","https://openalex.org/W4391305464","https://openalex.org/W4391621214","https://openalex.org/W4392341719","https://openalex.org/W4392902609","https://openalex.org/W4392903617","https://openalex.org/W4401610848","https://openalex.org/W6659849957","https://openalex.org/W6754782314","https://openalex.org/W6757798589","https://openalex.org/W6763684417","https://openalex.org/W6765415151","https://openalex.org/W6840332670","https://openalex.org/W7046848240"],"related_works":["https://openalex.org/W1571600724","https://openalex.org/W2897160306","https://openalex.org/W2604047122","https://openalex.org/W107154053","https://openalex.org/W1879255185","https://openalex.org/W2120442551","https://openalex.org/W2769861442","https://openalex.org/W1980506188","https://openalex.org/W2900122540","https://openalex.org/W4240587264"],"abstract_inverted_index":{"Spatial":[0],"audio":[1,140],"formats":[2],"like":[3],"Ambisonics":[4,61],"are":[5,144],"playback":[6],"device":[7],"layout-agnostic":[8],"and":[9,16,88,111,124,131,134],"well-suited":[10],"for":[11,29,53],"applications":[12],"such":[13],"as":[14],"teleconferencing":[15],"virtual":[17],"reality.":[18],"Conventional":[19],"Ambisonic":[20,86],"encoding":[21,54,99],"methods":[22],"often":[23],"rely":[24],"on":[25,76,108],"spherical":[26],"microphone":[27,56],"arrays":[28],"efficient":[30],"sound":[31],"field":[32],"capture,":[33],"which":[34],"limits":[35],"their":[36],"flexibility":[37],"in":[38,63],"practical":[39],"scenarios.":[40],"We":[41],"propose":[42],"a":[43,49,71,90,103],"deep":[44],"learning":[45],"(DL)-based":[46],"approach,":[47],"leveraging":[48],"two-stage":[50],"network":[51],"architecture":[52],"circular":[55,105],"array":[57],"signals":[58],"into":[59],"second-order":[60],"(SOA)":[62],"multi-speaker":[64],"environments.":[65],"In":[66],"addition,":[67],"we":[68],"introduce:":[69],"(i)":[70],"novel":[72],"loss":[73],"function":[74],"based":[75],"spatial":[77,132],"power":[78],"maps":[79],"to":[80,94],"regularize":[81],"inter-channel":[82],"correlations":[83],"of":[84,98],"the":[85,96],"signals,":[87],"(ii)":[89],"channel":[91],"permutation":[92],"technique":[93],"resolve":[95],"ambiguity":[97],"vertical":[100],"information":[101],"using":[102],"horizontal":[104],"array.":[106],"Evaluation":[107],"simulated":[109],"speech":[110],"noise":[112],"datasets":[113],"shows":[114],"that":[115],"our":[116],"approach":[117],"consistently":[118],"outperforms":[119],"traditional":[120],"signal":[121],"processing":[122],"(SP)":[123],"DL-based":[125],"methods,":[126],"providing":[127],"significantly":[128],"better":[129],"timbral":[130],"quality":[133],"higher":[135],"source":[136],"localization":[137],"accuracy.":[138],"Binaural":[139],"demos":[141],"with":[142],"visualizations":[143],"available":[145],"at":[146],"https://bridgoon97.github.io/NeuralAmbisonicEncoding/.":[147]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
