{"id":"https://openalex.org/W1608568678","doi":"https://doi.org/10.1109/icme.2015.7177497","title":"Audio informed visual speaker tracking with SMC-PHD filter","display_name":"Audio informed visual speaker tracking with SMC-PHD filter","publication_year":2015,"publication_date":"2015-06-01","ids":{"openalex":"https://openalex.org/W1608568678","doi":"https://doi.org/10.1109/icme.2015.7177497","mag":"1608568678"},"language":"en","primary_location":{"id":"doi:10.1109/icme.2015.7177497","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme.2015.7177497","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063512115","display_name":"Volkan K\u0131l\u0131\u00e7","orcid":"https://orcid.org/0000-0002-3164-1981"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Volkan Kilic","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027243174","display_name":"Mark Barnard","orcid":null},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark Barnard","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016376222","display_name":"Adrian Hilton","orcid":"https://orcid.org/0000-0003-4223-238X"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Adrian Hilton","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028209738","display_name":"Josef Kittler","orcid":"https://orcid.org/0000-0002-8110-9205"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Josef Kittler","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, UK","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5799,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.65713957,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10711","display_name":"Target Tracking and Data Fusion in Sensor Networks","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/particle-filter","display_name":"Particle filter","score":0.6805592179298401},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6776215434074402},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.6248749494552612},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5838812589645386},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5750017166137695},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.5460344552993774},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5120301842689514},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5024130344390869},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.460726261138916},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4487987756729126},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.44468793272972107},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4234987795352936},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4114295244216919},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1768868863582611},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.12160861492156982},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09472110867500305}],"concepts":[{"id":"https://openalex.org/C52421305","wikidata":"https://www.wikidata.org/wiki/Q1151499","display_name":"Particle filter","level":3,"score":0.6805592179298401},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6776215434074402},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.6248749494552612},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5838812589645386},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5750017166137695},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.5460344552993774},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5120301842689514},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5024130344390869},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.460726261138916},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4487987756729126},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.44468793272972107},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4234987795352936},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4114295244216919},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1768868863582611},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.12160861492156982},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09472110867500305},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icme.2015.7177497","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme.2015.7177497","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.kingston.ac.uk:43474","is_oa":false,"landing_page_url":"http://eprints.kingston.ac.uk/id/eprint/43474/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400991","display_name":"Research Repository (Kingston University London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801636071","host_organization_name":"Kingston Hospital NHS Trust","host_organization_lineage":["https://openalex.org/I2801636071"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:epubs.surrey.ac.uk:840320","is_oa":false,"landing_page_url":"http://epubs.surrey.ac.uk/840320/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400680","display_name":"Surrey Research Insight Open Access (The University of Surrey)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28290843","host_organization_name":"University of Surrey","host_organization_lineage":["https://openalex.org/I28290843"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.46000000834465027}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1557595030","https://openalex.org/W1649743501","https://openalex.org/W1973283204","https://openalex.org/W1993767722","https://openalex.org/W2014787937","https://openalex.org/W2071625890","https://openalex.org/W2088725063","https://openalex.org/W2101295974","https://openalex.org/W2124560604","https://openalex.org/W2126885789","https://openalex.org/W2129821199","https://openalex.org/W2145325704","https://openalex.org/W2149900846","https://openalex.org/W2161435744","https://openalex.org/W2169346523","https://openalex.org/W6633511732","https://openalex.org/W6637081511","https://openalex.org/W6679546383"],"related_works":["https://openalex.org/W1497101000","https://openalex.org/W2102101492","https://openalex.org/W2068959240","https://openalex.org/W2214805552","https://openalex.org/W2054021310","https://openalex.org/W2369119827","https://openalex.org/W2188506513","https://openalex.org/W2718007713","https://openalex.org/W2013820100","https://openalex.org/W2274330372"],"abstract_inverted_index":{"Sequential":[0],"Monte":[1],"Carlo":[2],"probability":[3],"hypothesis":[4],"density":[5],"(SMC-PHD)":[6],"filter":[7,33,127,163],"has":[8],"received":[9],"much":[10],"interest":[11],"in":[12,65,78,168],"the":[13,42,45,50,66,84,88,103,108,130,137,142,145,150,165],"field":[14],"of":[15,29,44,53,68,87,132,136,144,170],"nonlinear":[16],"non-Gaussian":[17],"visual":[18,89,124],"tracking":[19,166],"due":[20],"to":[21,24,40,75,94,98,101,123,140],"its":[22],"ability":[23],"handle":[25],"a":[26,62],"variable":[27,51],"number":[28,52],"speakers.":[30],"The":[31,58],"SMC-PHD":[32,125,161],"employs":[34],"surviving,":[35],"spawned":[36,111],"and":[37,47,106,110,173],"born":[38,59,104],"particles":[39,60,105],"model":[41],"state":[43],"speakers":[46,54],"jointly":[48],"estimates":[49],"with":[55,153],"their":[56],"states.":[57],"play":[61],"critical":[63],"role":[64],"detection":[67],"new":[69],"speakers,":[70],"which":[71],"makes":[72],"it":[73],"necessary":[74],"propagate":[76,102],"them":[77],"each":[79],"frame.":[80],"However,":[81],"this":[82],"increases":[83],"computational":[85,174],"cost":[86],"tracker.":[90],"Here,":[91],"we":[92,116],"propose":[93],"use":[95],"audio":[96,118,138],"data":[97,119],"determine":[99],"when":[100],"re-allocate":[107],"surviving":[109],"particles.":[112,146],"In":[113],"our":[114,158],"framework,":[115],"employ":[117],"as":[120],"an":[121],"aid":[122],"(V-SMC-PHD)":[126],"by":[128],"using":[129],"direction":[131],"arrival":[133],"(DOA)":[134],"angles":[135],"sources":[139],"reshape":[141],"distribution":[143],"Experimental":[147],"results":[148],"on":[149],"AV16:3":[151],"dataset":[152],"multi-speaker":[154],"sequences":[155],"show":[156],"that":[157],"proposed":[159],"audio-visual":[160],"(AV-SMC-PHD)":[162],"improves":[164],"performance":[167],"terms":[169],"estimation":[171],"accuracy":[172],"efficiency.":[175]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
