{"id":"https://openalex.org/W2769321409","doi":"https://doi.org/10.1109/tmm.2017.2777671","title":"Multiple Speaker Tracking in Spatial Audio via PHD Filtering and Depth-Audio Fusion","display_name":"Multiple Speaker Tracking in Spatial Audio via PHD Filtering and Depth-Audio Fusion","publication_year":2017,"publication_date":"2017-11-24","ids":{"openalex":"https://openalex.org/W2769321409","doi":"https://doi.org/10.1109/tmm.2017.2777671","mag":"2769321409"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2017.2777671","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2017.2777671","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004547566","display_name":"Qingju Liu","orcid":"https://orcid.org/0000-0003-0778-2992"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Qingju Liu","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":"https://orcid.org/0000-0003-0778-2992","affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":"https://orcid.org/0000-0002-8393-5703","affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076202462","display_name":"Te\u00f3filo de Campos","orcid":"https://orcid.org/0000-0001-6172-0229"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Teofilo de Campos","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022165330","display_name":"Philip J. B. Jackson","orcid":"https://orcid.org/0000-0001-7933-5935"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Philip J. B. Jackson","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016376222","display_name":"Adrian Hilton","orcid":"https://orcid.org/0000-0003-4223-238X"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Adrian Hilton","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5004547566"],"corresponding_institution_ids":["https://openalex.org/I28290843"],"apc_list":null,"apc_paid":null,"fwci":1.4913,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.84206939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"20","issue":"7","first_page":"1767","last_page":"1780"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8289819359779358},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6183697581291199},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.5244922041893005},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.5164985060691833},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.45303189754486084},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.4174199104309082},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3849060535430908},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.3687261939048767},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3266258239746094},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.2691458463668823}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8289819359779358},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6183697581291199},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.5244922041893005},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.5164985060691833},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.45303189754486084},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4174199104309082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3849060535430908},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.3687261939048767},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3266258239746094},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.2691458463668823},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tmm.2017.2777671","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2017.2777671","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:epubs.surrey.ac.uk:844927","is_oa":false,"landing_page_url":"http://epubs.surrey.ac.uk/844927/1/Multiple%20Speaker%20Tracking%20in%20Spatial%20Audio%20via%20PHD%20Filtering%20and%20Depth-Audio%20Fusion.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400680","display_name":"Surrey Research Insight Open Access (The University of Surrey)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28290843","host_organization_name":"University of Surrey","host_organization_lineage":["https://openalex.org/I28290843"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3034362336","display_name":null,"funder_award_id":"EP/P022529/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3186182679","display_name":null,"funder_award_id":"EP/L000539/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4179231495","display_name":null,"funder_award_id":"EP/M028321/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W1635512741","https://openalex.org/W1648602279","https://openalex.org/W1874477887","https://openalex.org/W1985621395","https://openalex.org/W1988294545","https://openalex.org/W2005181468","https://openalex.org/W2007162393","https://openalex.org/W2014787937","https://openalex.org/W2023100145","https://openalex.org/W2027807575","https://openalex.org/W2029587563","https://openalex.org/W2033573579","https://openalex.org/W2046317813","https://openalex.org/W2048435478","https://openalex.org/W2056898157","https://openalex.org/W2066832919","https://openalex.org/W2072595688","https://openalex.org/W2074527074","https://openalex.org/W2082581021","https://openalex.org/W2088725063","https://openalex.org/W2102767076","https://openalex.org/W2105934661","https://openalex.org/W2114609928","https://openalex.org/W2117088310","https://openalex.org/W2126489486","https://openalex.org/W2136986309","https://openalex.org/W2137585588","https://openalex.org/W2138414774","https://openalex.org/W2139336367","https://openalex.org/W2142511364","https://openalex.org/W2145256756","https://openalex.org/W2149900846","https://openalex.org/W2156137575","https://openalex.org/W2156924265","https://openalex.org/W2157938685","https://openalex.org/W2161435744","https://openalex.org/W2164406019","https://openalex.org/W2167529107","https://openalex.org/W2168195382","https://openalex.org/W2172156083","https://openalex.org/W2248489999","https://openalex.org/W2255129608","https://openalex.org/W2261362512","https://openalex.org/W2316138215","https://openalex.org/W2398659024","https://openalex.org/W2517955251","https://openalex.org/W2549440896","https://openalex.org/W3139565013","https://openalex.org/W3158497763","https://openalex.org/W4234263644","https://openalex.org/W6693075670","https://openalex.org/W6699662792","https://openalex.org/W6729461096"],"related_works":["https://openalex.org/W2375191981","https://openalex.org/W1603949574","https://openalex.org/W2379113420","https://openalex.org/W2604447241","https://openalex.org/W181699300","https://openalex.org/W2353318413","https://openalex.org/W2748661748","https://openalex.org/W2170815394","https://openalex.org/W2921688766","https://openalex.org/W24823704"],"abstract_inverted_index":{"In":[0],"the":[1,8,17,49,67,88,102,113,126,129,144,150,157,173,186,194,197,201,204,217,221],"object-based":[2],"spatial":[3,182],"audio":[4,9,60,90,169,175],"system,":[5],"positions":[6],"of":[7,55,128,147,149,220],"objects":[10,140],"(e.g.,":[11],"speakers/talkers":[12],"or":[13,141],"voices)":[14],"presented":[15,166],"in":[16,156,196,203,224],"sound":[18],"scene":[19],"are":[20,33,213],"required":[21],"as":[22,36],"important":[23],"metadata":[24],"attributes":[25],"for":[26],"object":[27,134],"acquisition":[28],"and":[29,44,47,53,73,116,200,227],"reproduction.":[30],"Binaural":[31],"microphones":[32],"often":[34],"used":[35,214],"a":[37,82,95,117,160],"physical":[38],"device":[39],"to":[40,45,66,112,124,143,167,177,215],"mimic":[41],"human":[42],"hearing":[43],"monitor":[46],"analyze":[48],"scene,":[50],"including":[51],"localization":[52],"tracking":[54,84],"multiple":[56],"speakers.":[57],"The":[58],"binaural":[59,89,174,198],"tracker,":[61],"however,":[62],"is":[63,109,122,135,165],"usually":[64],"prone":[65],"errors":[68,195],"caused":[69],"by":[70,86,138],"room":[71],"reverberation":[72],"background":[74],"noise.":[75],"To":[76,153],"address":[77],"this":[78],"limitation,":[79],"we":[80],"present":[81],"multimodal":[83],"method":[85,223],"fusing":[87],"with":[91],"depth":[92,96,114,151,158,187,205],"information":[93],"(from":[94],"sensor,":[97],"e.g.,":[98],"Kinect).":[99],"More":[100],"specifically,":[101],"probability":[103],"hypothesis":[104],"density":[105],"(PHD)":[106],"filtering":[107],"framework":[108],"first":[110],"applied":[111],"stream,":[115,159],"novel":[118,161],"clutter":[119],"intensity":[120],"model":[121],"proposed":[123,191,222],"improve":[125],"robustness":[127],"PHD":[130],"filter":[131],"when":[132],"an":[133],"occluded":[136],"either":[137],"other":[139],"due":[142],"limited":[145],"field":[146],"view":[148],"sensor.":[152],"compensate":[154],"misdetections":[155,202],"gap":[162],"filling":[163],"technique":[164],"map":[168],"azimuths":[170],"obtained":[171],"from":[172,185],"tracker":[176,199,206],"3D":[178],"positions,":[179],"using":[180],"speaker-dependent":[181],"constraints":[183],"learned":[184],"stream.":[188],"With":[189],"our":[190],"method,":[192],"both":[193],"can":[207],"be":[208],"significantly":[209],"reduced.":[210],"Real-room":[211],"recordings":[212],"show":[216],"improved":[218],"performance":[219],"removing":[225],"outliers":[226],"reducing":[228],"misdetections.":[229]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
