{"id":"https://openalex.org/W3161772428","doi":"https://doi.org/10.1109/icpr48806.2021.9412682","title":"3D Audio-Visual Speaker Tracking with A Novel Particle Filter","display_name":"3D Audio-Visual Speaker Tracking with A Novel Particle Filter","publication_year":2021,"publication_date":"2021-01-10","ids":{"openalex":"https://openalex.org/W3161772428","doi":"https://doi.org/10.1109/icpr48806.2021.9412682","mag":"3161772428"},"language":"en","primary_location":{"id":"doi:10.1109/icpr48806.2021.9412682","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412682","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100410326","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0002-7498-6541"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hong Liu","raw_affiliation_strings":["Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I180726961","https://openalex.org/I4210128628"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084017208","display_name":"Yongheng Sun","orcid":"https://orcid.org/0000-0002-0103-0215"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongheng Sun","raw_affiliation_strings":["Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I180726961","https://openalex.org/I4210128628"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071411382","display_name":"Yidi Li","orcid":"https://orcid.org/0000-0002-5236-7010"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yidi Li","raw_affiliation_strings":["Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I180726961","https://openalex.org/I4210128628"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086513946","display_name":"Bing Yang","orcid":"https://orcid.org/0000-0002-8978-2322"},"institutions":[{"id":"https://openalex.org/I4210128628","display_name":"Peking University Shenzhen Hospital","ror":"https://ror.org/03kkjyb15","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210128628"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bing Yang","raw_affiliation_strings":["Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School,Shenzhen,China","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I180726961","https://openalex.org/I4210128628"]},{"raw_affiliation_string":"Key Laboratory of Machine Perception Peking University, Shenzhen Graduate School, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100410326"],"corresponding_institution_ids":["https://openalex.org/I180726961","https://openalex.org/I20231570","https://openalex.org/I4210128628"],"apc_list":null,"apc_paid":null,"fwci":0.7618,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.69986477,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"7343","last_page":"7348"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/particle-filter","display_name":"Particle filter","score":0.8361155390739441},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7332394123077393},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6765381097793579},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6633156538009644},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.6542191505432129},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5211448669433594},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.48517757654190063},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.4641004800796509},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.45927363634109497},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4426884949207306},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.435441255569458},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4151086211204529},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.39387398958206177},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.3660214841365814},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.0892970860004425}],"concepts":[{"id":"https://openalex.org/C52421305","wikidata":"https://www.wikidata.org/wiki/Q1151499","display_name":"Particle filter","level":3,"score":0.8361155390739441},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7332394123077393},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6765381097793579},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6633156538009644},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.6542191505432129},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5211448669433594},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.48517757654190063},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.4641004800796509},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.45927363634109497},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4426884949207306},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.435441255569458},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4151086211204529},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.39387398958206177},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3660214841365814},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0892970860004425},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr48806.2021.9412682","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412682","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4399999976158142,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3357396989","display_name":null,"funder_award_id":"JCYJ20190808182209321","funder_id":"https://openalex.org/F4320329801","funder_display_name":"Shenzhen Research and Development Program"},{"id":"https://openalex.org/G401644923","display_name":null,"funder_award_id":"61673030,U1613209","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329801","display_name":"Shenzhen Research and Development Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1497738639","https://openalex.org/W1518556865","https://openalex.org/W1553557502","https://openalex.org/W1969299255","https://openalex.org/W1974793196","https://openalex.org/W1985430680","https://openalex.org/W2011594914","https://openalex.org/W2011635389","https://openalex.org/W2023100145","https://openalex.org/W2053101950","https://openalex.org/W2088725063","https://openalex.org/W2091377065","https://openalex.org/W2093010905","https://openalex.org/W2100152238","https://openalex.org/W2129821199","https://openalex.org/W2129866629","https://openalex.org/W2160337655","https://openalex.org/W2166916857","https://openalex.org/W2167206042","https://openalex.org/W2285716245","https://openalex.org/W2615100522","https://openalex.org/W2697319950","https://openalex.org/W2884554680","https://openalex.org/W2889091037","https://openalex.org/W2918984654","https://openalex.org/W2970201661","https://openalex.org/W6629785575","https://openalex.org/W6633285549","https://openalex.org/W6679546383","https://openalex.org/W6739944753","https://openalex.org/W6752997945"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"3D":[0,29,57,71,179,200],"speaker":[1,59,67,112,151,175,195],"tracking":[2,30,68,196],"using":[3],"co-located":[4],"audio-visual":[5,58,66,72,84,162],"sensors":[6,37],"has":[7,74],"received":[8],"much":[9],"attention":[10],"recently.":[11],"Though":[12],"various":[13],"methods":[14,190],"have":[15],"been":[16],"attempted":[17],"to":[18,25,40,89,140],"this":[19,45,166],"field,":[20],"it":[21],"is":[22,54,121,137,159],"still":[23],"challenging":[24],"obtain":[26],"a":[27,41,47,193],"reliable":[28],"result":[31],"since":[32],"the":[33,79,92,95,98,106,111,115,118,126,134,141,146,156,168,174,185,204],"position":[34],"of":[35,94,150],"colocated":[36],"are":[38],"restricted":[39],"small":[42],"area.":[43],"In":[44,78,114,165],"paper,":[46],"novel":[48],"particle":[49,99,119,163,169],"filter":[50],"(PF)":[51],"based":[52,65],"method":[53],"proposed":[55,186],"for":[56],"tracking.":[60],"Compared":[61],"with":[62],"traditional":[63],"PF":[64],"method,":[69],"our":[70],"tracker":[73,187],"two":[75],"main":[76],"characteristics.":[77],"prediction":[80],"stage,":[81,117],"we":[82],"use":[83],"information":[85],"at":[86],"current":[87],"frame":[88],"further":[90],"adjust":[91],"direction":[93,131,157],"particles":[96,107],"after":[97],"state":[100],"transition":[101],"process,":[102],"which":[103],"can":[104,171],"make":[105],"more":[108],"concentrated":[109],"around":[110],"direction.":[113],"update":[116],"likelihood":[120,136,158,170],"calculated":[122],"by":[123,161],"fusing":[124],"both":[125,198],"visual":[127],"distance":[128,135],"and":[129,145,155,191,202],"audiovisual":[130],"information.":[132],"Specially,":[133],"obtained":[138],"according":[139],"camera":[142],"projection":[143],"model":[144],"adaptively":[147],"estimated":[148],"size":[149],"face":[152],"or":[153],"head,":[154],"determined":[160],"fitness.":[164],"way,":[167],"better":[172],"represent":[173],"presence":[176],"probability":[177],"in":[178,199],"space.":[180],"Experimental":[181],"results":[182],"show":[183],"that":[184],"outperforms":[188],"other":[189],"provides":[192],"favorable":[194],"performance":[197],"space":[201],"on":[203],"image":[205],"plane.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
