{"id":"https://openalex.org/W3135222947","doi":"https://doi.org/10.1109/tmm.2021.3061800","title":"Audio-Visual Tracking of Concurrent Speakers","display_name":"Audio-Visual Tracking of Concurrent Speakers","publication_year":2021,"publication_date":"2021-02-24","ids":{"openalex":"https://openalex.org/W3135222947","doi":"https://doi.org/10.1109/tmm.2021.3061800","mag":"3135222947"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2021.3061800","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3061800","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/71342/2/Cavallaro%20Audio-visual%20tracking%20of%202021%20Accepted.PDF","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056495776","display_name":"Xinyuan Qian","orcid":"https://orcid.org/0000-0002-9511-6713"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Xinyuan Qian","raw_affiliation_strings":["Centre of Intelligent Sensing (CIS), Queen Mary University of London (QMUL), London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Centre of Intelligent Sensing (CIS), Queen Mary University of London (QMUL), London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066363315","display_name":"Alessio Brutti","orcid":"https://orcid.org/0000-0003-4146-3071"},"institutions":[{"id":"https://openalex.org/I2277624104","display_name":"Fondazione Bruno Kessler","ror":"https://ror.org/01j33xk10","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2277624104"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Alessio Brutti","raw_affiliation_strings":["Fondazione Bruno Kessler (FBK), Trento, Italy"],"affiliations":[{"raw_affiliation_string":"Fondazione Bruno Kessler (FBK), Trento, Italy","institution_ids":["https://openalex.org/I2277624104"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057476577","display_name":"Oswald Lanz","orcid":"https://orcid.org/0000-0003-4793-4276"},"institutions":[{"id":"https://openalex.org/I2277624104","display_name":"Fondazione Bruno Kessler","ror":"https://ror.org/01j33xk10","country_code":"IT","type":"facility","lineage":["https://openalex.org/I2277624104"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Oswald Lanz","raw_affiliation_strings":["Fondazione Bruno Kessler (FBK), Trento, Italy"],"affiliations":[{"raw_affiliation_string":"Fondazione Bruno Kessler (FBK), Trento, Italy","institution_ids":["https://openalex.org/I2277624104"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025736898","display_name":"Maurizio Omologo","orcid":"https://orcid.org/0000-0003-0879-0548"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maurizio Omologo","raw_affiliation_strings":["FBK, Italy"],"affiliations":[{"raw_affiliation_string":"FBK, Italy","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004087827","display_name":"Andrea Cavallaro","orcid":"https://orcid.org/0000-0001-5086-7858"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrea Cavallaro","raw_affiliation_strings":["Centre of Intelligent Sensing (CIS), Queen Mary University of London (QMUL), London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Centre of Intelligent Sensing (CIS), Queen Mary University of London (QMUL), London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5056495776"],"corresponding_institution_ids":["https://openalex.org/I166337079"],"apc_list":null,"apc_paid":null,"fwci":3.5391,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.93516779,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":"24","issue":null,"first_page":"942","last_page":"954"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8895457983016968},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8268216252326965},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6964938044548035},{"id":"https://openalex.org/keywords/bittorrent-tracker","display_name":"BitTorrent tracker","score":0.6179224848747253},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5692075490951538},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.5099700689315796},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.4556792378425598},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4450911581516266},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.44138094782829285},{"id":"https://openalex.org/keywords/particle-filter","display_name":"Particle filter","score":0.41441625356674194},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.40262264013290405},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.37291815876960754},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3279991149902344},{"id":"https://openalex.org/keywords/kalman-filter","display_name":"Kalman filter","score":0.13546621799468994}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8895457983016968},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8268216252326965},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6964938044548035},{"id":"https://openalex.org/C57501372","wikidata":"https://www.wikidata.org/wiki/Q2021268","display_name":"BitTorrent tracker","level":3,"score":0.6179224848747253},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5692075490951538},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.5099700689315796},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.4556792378425598},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4450911581516266},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.44138094782829285},{"id":"https://openalex.org/C52421305","wikidata":"https://www.wikidata.org/wiki/Q1151499","display_name":"Particle filter","level":3,"score":0.41441625356674194},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40262264013290405},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.37291815876960754},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3279991149902344},{"id":"https://openalex.org/C157286648","wikidata":"https://www.wikidata.org/wiki/Q846780","display_name":"Kalman filter","level":2,"score":0.13546621799468994},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/tmm.2021.3061800","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2021.3061800","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/71342","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/71342","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/71342/2/Cavallaro%20Audio-visual%20tracking%20of%202021%20Accepted.PDF","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:alma.39UBZ_INST:11285008920001241","is_oa":true,"landing_page_url":"https://ieeexplore.ieee.org/document/9362311","pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:unibz.it:11285008920001241","is_oa":true,"landing_page_url":"https://bia.unibz.it/esploro/outputs/journalArticle/Audio-Visual-Tracking-of-Concurrent-Speakers/991006432798701241","pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/71342","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/71342","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/71342/2/Cavallaro%20Audio-visual%20tracking%20of%202021%20Accepted.PDF","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},"sustainable_development_goals":[{"score":0.7699999809265137,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3135222947.pdf","grobid_xml":"https://content.openalex.org/works/W3135222947.grobid-xml"},"referenced_works_count":63,"referenced_works":["https://openalex.org/W1553557502","https://openalex.org/W1583773829","https://openalex.org/W1908905119","https://openalex.org/W1969299255","https://openalex.org/W1974793196","https://openalex.org/W1988904709","https://openalex.org/W1993815147","https://openalex.org/W1997121481","https://openalex.org/W2016157229","https://openalex.org/W2019972236","https://openalex.org/W2023100145","https://openalex.org/W2033819227","https://openalex.org/W2038943746","https://openalex.org/W2046317813","https://openalex.org/W2055098062","https://openalex.org/W2088725063","https://openalex.org/W2091377065","https://openalex.org/W2096301647","https://openalex.org/W2100152238","https://openalex.org/W2101295974","https://openalex.org/W2102605133","https://openalex.org/W2107457079","https://openalex.org/W2117449667","https://openalex.org/W2126885789","https://openalex.org/W2127000566","https://openalex.org/W2128571393","https://openalex.org/W2129821199","https://openalex.org/W2140345984","https://openalex.org/W2141461755","https://openalex.org/W2142612029","https://openalex.org/W2144248397","https://openalex.org/W2159128898","https://openalex.org/W2159259269","https://openalex.org/W2160557558","https://openalex.org/W2161160446","https://openalex.org/W2167206042","https://openalex.org/W2167582467","https://openalex.org/W2171306417","https://openalex.org/W2316138215","https://openalex.org/W2484797342","https://openalex.org/W2494424170","https://openalex.org/W2497516627","https://openalex.org/W2517955251","https://openalex.org/W2543696449","https://openalex.org/W2545563415","https://openalex.org/W2579152745","https://openalex.org/W2697319950","https://openalex.org/W2752604219","https://openalex.org/W2769321409","https://openalex.org/W2871284238","https://openalex.org/W2893728996","https://openalex.org/W2914461061","https://openalex.org/W2918984654","https://openalex.org/W2963460857","https://openalex.org/W2963809933","https://openalex.org/W2981873231","https://openalex.org/W2989954484","https://openalex.org/W3114753236","https://openalex.org/W4253928870","https://openalex.org/W6620707391","https://openalex.org/W6677907805","https://openalex.org/W6684845544","https://openalex.org/W6878916410"],"related_works":["https://openalex.org/W2062103941","https://openalex.org/W4384788979","https://openalex.org/W2511178891","https://openalex.org/W2126676984","https://openalex.org/W178060743","https://openalex.org/W2909390414","https://openalex.org/W2954509079","https://openalex.org/W3104472694","https://openalex.org/W2145247614","https://openalex.org/W2141888607"],"abstract_inverted_index":{"Audio-visual":[0],"tracking":[1],"of":[2,6,94],"an":[3],"unknown":[4],"number":[5],"concurrent":[7,52],"speakers":[8,53],"in":[9,44,123],"3D":[10,64,68,124],"is":[11],"a":[12,23,32,45,55],"challenging":[13],"task,":[14],"especially":[15],"when":[16],"sound":[17],"and":[18,38,97,106,118,125],"video":[19,65],"are":[20,71],"collected":[21],"with":[22,54],"compact":[24],"sensing":[25],"platform.":[26],"In":[27],"this":[28],"paper,":[29],"we":[30],"propose":[31],"tracker":[33,113],"that":[34,110],"builds":[35],"on":[36,91,104,126],"generative":[37,88],"discriminative":[39,78],"audio-visual":[40],"likelihood":[41,79],"models":[42],"formulated":[43],"particle":[46],"filtering":[47],"framework.":[48],"We":[49],"localize":[50],"multiple":[51],"de-emphasized":[56,99],"acoustic":[57,100],"map":[58,101],"assisted":[59],"by":[60],"the":[61,95,98,111,115,119,127],"image":[62,128],"detection-derived":[63],"observations.":[66],"The":[67,87],"multi-modal":[69],"observations":[70],"either":[72],"assigned":[73],"to":[74,83],"existing":[75],"tracks":[76],"for":[77],"computation":[80],"or":[81],"used":[82],"initialize":[84],"new":[85],"tracks.":[86],"likelihoods":[89],"rely":[90],"color":[92],"distribution":[93],"target":[96],"value.":[102],"Experiments":[103],"AV16.3":[105],"CAV3D":[107],"datasets":[108],"show":[109],"proposed":[112],"outperforms":[114],"uni-modal":[116],"trackers":[117],"state-of-the-art":[120],"approaches":[121],"both":[122],"plane.":[129]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
