{"id":"https://openalex.org/W4408353031","doi":"https://doi.org/10.1109/icassp49660.2025.10890536","title":"The Importance of Spatial and Spectral Information in Multiple Speaker Tracking","display_name":"The Importance of Spatial and Spectral Information in Multiple Speaker Tracking","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353031","doi":"https://doi.org/10.1109/icassp49660.2025.10890536"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890536","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071964843","display_name":"Hanan Beit-On","orcid":"https://orcid.org/0000-0002-5300-2339"},"institutions":[{"id":"https://openalex.org/I124227911","display_name":"Ben-Gurion University of the Negev","ror":"https://ror.org/05tkyf982","country_code":"IL","type":"education","lineage":["https://openalex.org/I124227911"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Hanan Beit-On","raw_affiliation_strings":["Ben-Gurion University of the Negev,School of Electrical and Computer Engineering,Be&#x2019;er-Sheva,Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ben-Gurion University of the Negev,School of Electrical and Computer Engineering,Be&#x2019;er-Sheva,Israel","institution_ids":["https://openalex.org/I124227911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026575041","display_name":"Vladimir Tourbabin","orcid":"https://orcid.org/0000-0003-2536-5666"},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vladimir Tourbabin","raw_affiliation_strings":["Reality Labs Research Meta,Redmond, Seattle,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Reality Labs Research Meta,Redmond, Seattle,USA","institution_ids":["https://openalex.org/I58610484"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019518606","display_name":"Boaz Rafaely","orcid":"https://orcid.org/0000-0002-6819-9250"},"institutions":[{"id":"https://openalex.org/I124227911","display_name":"Ben-Gurion University of the Negev","ror":"https://ror.org/05tkyf982","country_code":"IL","type":"education","lineage":["https://openalex.org/I124227911"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Boaz Rafaely","raw_affiliation_strings":["Ben-Gurion University of the Negev,School of Electrical and Computer Engineering,Be&#x2019;er-Sheva,Israel"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ben-Gurion University of the Negev,School of Electrical and Computer Engineering,Be&#x2019;er-Sheva,Israel","institution_ids":["https://openalex.org/I124227911"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03794271,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.90420001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6730153560638428},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5247045755386353},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3827924132347107},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38093191385269165},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.05698674917221069}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6730153560638428},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5247045755386353},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3827924132347107},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38093191385269165},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.05698674917221069},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890536","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1966081805","https://openalex.org/W2031744243","https://openalex.org/W2050551142","https://openalex.org/W2090629256","https://openalex.org/W2101206777","https://openalex.org/W2566886590","https://openalex.org/W2897977894","https://openalex.org/W2963846200","https://openalex.org/W3015191643","https://openalex.org/W3021261768","https://openalex.org/W3095838395","https://openalex.org/W3132830522","https://openalex.org/W3163652268","https://openalex.org/W3163881933","https://openalex.org/W3167533889","https://openalex.org/W3206329344","https://openalex.org/W4211164258","https://openalex.org/W4213377681","https://openalex.org/W4224934174","https://openalex.org/W6746735323"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Multi-speaker":[0],"localization":[1],"and":[2],"tracking":[3,23],"using":[4,116],"microphone":[5],"array":[6],"recording":[7],"is":[8,24,54,78],"of":[9,15,18,50],"importance":[10],"in":[11,91,120],"a":[12,62],"wide":[13],"range":[14],"applications.":[16],"One":[17],"the":[19,30,92,101,107,111,121],"challenges":[20],"with":[21,29],"multi-speaker":[22],"to":[25,45],"associate":[26],"direction":[27],"estimates":[28],"correct":[31],"speaker.":[32],"Most":[33],"existing":[34],"association":[35,66,71,93],"approaches":[36],"rely":[37],"on":[38,73,88,104],"spatial":[39],"or":[40,57],"spectral":[41,89],"information":[42,52,119],"alone,":[43],"leading":[44],"performance":[46,113],"degradation":[47],"when":[48],"one":[49],"these":[51],"channels":[53],"partially":[55],"known":[56],"missing.":[58],"This":[59,77],"paper":[60],"studies":[61],"joint":[63,74,117],"probability":[64],"data":[65],"(JPDA)-based":[67],"method":[68,103],"that":[69,99],"facilitates":[70],"based":[72,87],"spatial-spectral":[75,118],"information.":[76],"achieved":[79],"by":[80,115],"integrating":[81],"speaker":[82],"time-frequency":[83],"(TF)":[84],"masks,":[85],"estimated":[86],"information,":[90],"probabilities":[94],"calculation.":[95],"An":[96],"experimental":[97],"study":[98],"tested":[100],"proposed":[102],"recordings":[105],"from":[106],"LOCATA":[108],"challenge":[109],"demonstrates":[110],"enhanced":[112],"obtained":[114],"association.":[122]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
