{"id":"https://openalex.org/W3162036982","doi":"https://doi.org/10.1109/taslp.2021.3079809","title":"Indoor Multi-Speaker Localization Based on Bayesian Nonparametrics in the Circular Harmonic Domain","display_name":"Indoor Multi-Speaker Localization Based on Bayesian Nonparametrics in the Circular Harmonic Domain","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3162036982","doi":"https://doi.org/10.1109/taslp.2021.3079809","mag":"3162036982"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3079809","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3079809","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015242515","display_name":"Kunkun SongGong","orcid":"https://orcid.org/0000-0002-6549-5046"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kunkun SongGong","raw_affiliation_strings":["College of Electronic and Information Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100704485","display_name":"Huawei Chen","orcid":"https://orcid.org/0000-0002-5020-3012"},"institutions":[{"id":"https://openalex.org/I9842412","display_name":"Nanjing University of Aeronautics and Astronautics","ror":"https://ror.org/01scyh794","country_code":"CN","type":"education","lineage":["https://openalex.org/I9842412"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huawei Chen","raw_affiliation_strings":["College of Electronic and Information Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"College of Electronic and Information Engineering, Nanjing University of Aeronautics and Astronautics, Nanjing, China","institution_ids":["https://openalex.org/I9842412"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford GU2 7XH, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford GU2 7XH, U.K","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5015242515"],"corresponding_institution_ids":["https://openalex.org/I9842412"],"apc_list":null,"apc_paid":null,"fwci":1.2189,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.78566599,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"29","issue":null,"first_page":"1864","last_page":"1880"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.6538661122322083},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.6365455389022827},{"id":"https://openalex.org/keywords/beamforming","display_name":"Beamforming","score":0.6303318738937378},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6283900737762451},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.5016229152679443},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.4848712384700775},{"id":"https://openalex.org/keywords/acoustic-source-localization","display_name":"Acoustic source localization","score":0.46606332063674927},{"id":"https://openalex.org/keywords/direction-of-arrival","display_name":"Direction of arrival","score":0.46451324224472046},{"id":"https://openalex.org/keywords/frequency-domain","display_name":"Frequency domain","score":0.4377608299255371},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.385846883058548},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3595556318759918},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.14060673117637634},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12830692529678345},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11760246753692627},{"id":"https://openalex.org/keywords/acoustic-wave","display_name":"Acoustic wave","score":0.0853344202041626}],"concepts":[{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.6538661122322083},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.6365455389022827},{"id":"https://openalex.org/C54197355","wikidata":"https://www.wikidata.org/wiki/Q5782992","display_name":"Beamforming","level":2,"score":0.6303318738937378},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6283900737762451},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.5016229152679443},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.4848712384700775},{"id":"https://openalex.org/C93240960","wikidata":"https://www.wikidata.org/wiki/Q217270","display_name":"Acoustic source localization","level":3,"score":0.46606332063674927},{"id":"https://openalex.org/C172051844","wikidata":"https://www.wikidata.org/wiki/Q5280438","display_name":"Direction of arrival","level":3,"score":0.46451324224472046},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.4377608299255371},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.385846883058548},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3595556318759918},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.14060673117637634},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12830692529678345},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11760246753692627},{"id":"https://openalex.org/C204723758","wikidata":"https://www.wikidata.org/wiki/Q3882459","display_name":"Acoustic wave","level":2,"score":0.0853344202041626},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C21822782","wikidata":"https://www.wikidata.org/wiki/Q131214","display_name":"Antenna (radio)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2021.3079809","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3079809","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:alma.44SUR_INST:11149539800002346","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5261870401","display_name":null,"funder_award_id":"61971219","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6165064650","display_name":null,"funder_award_id":"SKLA202015","funder_id":"https://openalex.org/F4320338098","funder_display_name":"Anhui Academy of Recycling Economical and Technical Engineering, Chinese Academy of Sciences"},{"id":"https://openalex.org/G6769886567","display_name":null,"funder_award_id":"61471190","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335675","display_name":"State Key Laboratory of Acoustics","ror":null},{"id":"https://openalex.org/F4320338098","display_name":"Anhui Academy of Recycling Economical and Technical Engineering, Chinese Academy of Sciences","ror":"https://ror.org/01yy74248"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W962432006","https://openalex.org/W1536644969","https://openalex.org/W1605972624","https://openalex.org/W1966081805","https://openalex.org/W1969299255","https://openalex.org/W1974387177","https://openalex.org/W1979951482","https://openalex.org/W1987906574","https://openalex.org/W1998428434","https://openalex.org/W2008692090","https://openalex.org/W2011235948","https://openalex.org/W2022637486","https://openalex.org/W2023254078","https://openalex.org/W2046317813","https://openalex.org/W2060108923","https://openalex.org/W2074134755","https://openalex.org/W2080220804","https://openalex.org/W2101616935","https://openalex.org/W2108100421","https://openalex.org/W2113638573","https://openalex.org/W2115870554","https://openalex.org/W2117678320","https://openalex.org/W2122041143","https://openalex.org/W2128131274","https://openalex.org/W2136775245","https://openalex.org/W2149648624","https://openalex.org/W2154604839","https://openalex.org/W2161349621","https://openalex.org/W2163994287","https://openalex.org/W2170393302","https://openalex.org/W2331866809","https://openalex.org/W2524249505","https://openalex.org/W2560973751","https://openalex.org/W2607152227","https://openalex.org/W2728130761","https://openalex.org/W2745195618","https://openalex.org/W2776323726","https://openalex.org/W2871284238","https://openalex.org/W2900193606","https://openalex.org/W2986631036","https://openalex.org/W3104947433","https://openalex.org/W3127686677","https://openalex.org/W4229751704","https://openalex.org/W6655956378","https://openalex.org/W6679947693","https://openalex.org/W6682325752","https://openalex.org/W6917638038"],"related_works":["https://openalex.org/W1562475690","https://openalex.org/W1488529827","https://openalex.org/W4389082013","https://openalex.org/W1879255185","https://openalex.org/W2120442551","https://openalex.org/W2769861442","https://openalex.org/W1980506188","https://openalex.org/W2900122540","https://openalex.org/W4240587264","https://openalex.org/W2043735059"],"abstract_inverted_index":{"Circular":[0],"microphone":[1],"arrays":[2],"have":[3],"been":[4],"used":[5],"for":[6,14,28],"multi-speaker":[7,105],"localization":[8,34,91,106],"in":[9,18,65,97,108,150,190,251,259],"computational":[10],"auditory":[11],"scene":[12],"analysis,":[13,21],"their":[15],"high":[16],"flexibility":[17],"sound":[19],"field":[20],"including":[22],"the":[23,33,50,60,75,83,89,109,115,122,131,136,151,163,171,174,178,182,191,214,262,267,271],"generation":[24],"of":[25,36,56,62,77,88,146,173,184,193,216,254,261,266],"frequency-invariant":[26],"eigenbeams":[27],"wideband":[29],"acoustic":[30,66,116],"sources.":[31,185],"However,":[32],"performance":[35,84],"existing":[37],"circular":[38,43,110],"harmonic":[39,111],"approaches,":[40],"such":[41],"as":[42,54,69],"harmonics":[44],"beamformer":[45],"(CHB)":[46],"depends":[47],"strongly":[48],"on":[49,114,162,213,233],"physical":[51],"characteristics":[52],"(such":[53,68],"shape)":[55],"sensor":[57],"arrays,":[58],"and":[59,74,121,155,196,210,223,236,256,270],"level":[61],"uncertainties":[63,80],"presented":[64],"environments":[67],"background":[70,197],"noise,":[71],"room":[72,194],"reverberation,":[73],"number":[76,183],"sources).":[78],"These":[79,186],"may":[81,188],"limit":[82],"or":[85],"practical":[86],"application":[87],"speaker":[90],"algorithms.":[92],"To":[93,199],"address":[94,200],"these":[95],"issues,":[96],"this":[98,201],"paper,":[99],"we":[100,129,203],"present":[101],"a":[102,158,205,252],"new":[103],"indoor":[104],"method":[107,212,241],"domain":[112],"based":[113,161],"holography":[117],"beamforming":[118,138],"(AHB)":[119],"technique":[120],"Bayesian":[123],"nonparametrics":[124],"(BNP)":[125],"method.":[126],"More":[127],"specifically,":[128],"use":[130],"AHB":[132],"technique,":[133],"which":[134],"combines":[135],"delay-and-sum":[137],"with":[139],"acoustic-holography-based":[140],"virtual":[141],"sensing,":[142],"to":[143,169],"generate":[144],"direction":[145],"arrival":[147],"(DOA)":[148],"measurements":[149],"time-frequency":[152],"(TF)":[153],"domain,":[154],"then":[156],"design":[157],"BNP":[159],"algorithm":[160],"infinite":[164],"Gaussian":[165],"mixture":[166,217],"model":[167],"(IGMM)":[168],"estimate":[170],"DOAs":[172],"individual":[175],"sources":[176],"without":[177],"prior":[179],"knowledge":[180],"about":[181],"estimates":[187],"degrade":[189],"presence":[192],"reverberation":[195,257],"noise.":[198],"issue,":[202],"develop":[204],"robust":[206],"TF":[207,229],"bin":[208],"selection":[209],"permutation":[211],"basis":[215],"weights,":[218],"using":[219],"power,":[220],"power":[221],"ratio":[222],"local":[224],"variance":[225],"estimated":[226],"at":[227],"each":[228],"bin.":[230],"Experiments":[231],"performed":[232],"both":[234],"simulated":[235],"real-data":[237],"show":[238],"that":[239],"our":[240],"gives":[242],"significantly":[243],"better":[244],"performance,":[245],"than":[246],"four":[247],"recent":[248],"baseline":[249],"methods,":[250],"variety":[253],"noise":[255],"levels,":[258],"terms":[260],"root-mean-square":[263],"error":[264],"(RMSE)":[265],"DOA":[268],"estimation":[269],"source":[272],"detecting":[273],"success":[274],"rate.":[275]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
