{"id":"https://openalex.org/W4386763588","doi":"https://doi.org/10.1109/waspaa58266.2023.10248185","title":"Audio Inputs for Active Speaker Detection and Localization Via Microphone Array","display_name":"Audio Inputs for Active Speaker Detection and Localization Via Microphone Array","publication_year":2023,"publication_date":"2023-09-15","ids":{"openalex":"https://openalex.org/W4386763588","doi":"https://doi.org/10.1109/waspaa58266.2023.10248185"},"language":"en","primary_location":{"id":"doi:10.1109/waspaa58266.2023.10248185","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/waspaa58266.2023.10248185","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005679652","display_name":"Davide Berghi","orcid":"https://orcid.org/0000-0001-6279-6364"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Davide Berghi","raw_affiliation_strings":["University of Surrey,Centre for Vision, Speech and Signal Processing,Guildford,UK","Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, UK"],"affiliations":[{"raw_affiliation_string":"University of Surrey,Centre for Vision, Speech and Signal Processing,Guildford,UK","institution_ids":["https://openalex.org/I28290843"]},{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, UK","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022165330","display_name":"Philip J. B. Jackson","orcid":"https://orcid.org/0000-0001-7933-5935"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Philip J. B. Jackson","raw_affiliation_strings":["University of Surrey,Centre for Vision, Speech and Signal Processing,Guildford,UK","Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, UK"],"affiliations":[{"raw_affiliation_string":"University of Surrey,Centre for Vision, Speech and Signal Processing,Guildford,UK","institution_ids":["https://openalex.org/I28290843"]},{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, UK","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5005679652"],"corresponding_institution_ids":["https://openalex.org/I28290843"],"apc_list":null,"apc_paid":null,"fwci":0.4066,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.574263,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.7523398399353027},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6924553513526917},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6625282764434814},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.5868602395057678},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.45890089869499207},{"id":"https://openalex.org/keywords/noise-canceling-microphone","display_name":"Noise-canceling microphone","score":0.45377805829048157},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0963471531867981},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07034355401992798}],"concepts":[{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.7523398399353027},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6924553513526917},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6625282764434814},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.5868602395057678},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.45890089869499207},{"id":"https://openalex.org/C36922181","wikidata":"https://www.wikidata.org/wiki/Q7047650","display_name":"Noise-canceling microphone","level":5,"score":0.45377805829048157},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0963471531867981},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07034355401992798},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa58266.2023.10248185","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/waspaa58266.2023.10248185","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1478229083","display_name":null,"funder_award_id":"EP/V038087/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2037227137","https://openalex.org/W2046317813","https://openalex.org/W2073902875","https://openalex.org/W2316138215","https://openalex.org/W2772736377","https://openalex.org/W2810934215","https://openalex.org/W2942551338","https://openalex.org/W2954458766","https://openalex.org/W2989980422","https://openalex.org/W3005741390","https://openalex.org/W3016098309","https://openalex.org/W3031666665","https://openalex.org/W3034702511","https://openalex.org/W3091667472","https://openalex.org/W3092237241","https://openalex.org/W3098454764","https://openalex.org/W3105684258","https://openalex.org/W3163193264","https://openalex.org/W3163881933","https://openalex.org/W3176232375","https://openalex.org/W3189964604","https://openalex.org/W3203177955","https://openalex.org/W3206008172","https://openalex.org/W3207207922","https://openalex.org/W3213455868","https://openalex.org/W3213716738","https://openalex.org/W4282031641","https://openalex.org/W4309598585","https://openalex.org/W4312356258","https://openalex.org/W6757716108","https://openalex.org/W6773738941","https://openalex.org/W6784117923","https://openalex.org/W6784119104","https://openalex.org/W6784824663","https://openalex.org/W6785208711","https://openalex.org/W6787890147","https://openalex.org/W6803901991","https://openalex.org/W6838853441"],"related_works":["https://openalex.org/W2049810559","https://openalex.org/W2068528578","https://openalex.org/W2015730342","https://openalex.org/W1515932869","https://openalex.org/W2769861442","https://openalex.org/W1654949927","https://openalex.org/W2085249053","https://openalex.org/W2037537113","https://openalex.org/W4240995952","https://openalex.org/W1975706483"],"abstract_inverted_index":{"This":[0],"study":[1],"considers":[2],"the":[3,38,46,50,62,78,85,115,125,131,142],"problem":[4],"of":[5,40,52,64,121],"detecting":[6],"and":[7,30,67,91,106,119],"locating":[8],"an":[9],"active":[10,27],"talker\u2019s":[11],"horizontal":[12],"position":[13],"from":[14,45,114],"multichannel":[15,47],"audio":[16,48],"captured":[17],"by":[18,111],"a":[19,53,92],"microphone":[20],"array.":[21,117],"We":[22],"refer":[23],"to":[24,36,61,76,99,128,137],"this":[25,71],"as":[26,49],"speaker":[28],"detection":[29],"localization":[31],"(ASDL).":[32],"Our":[33],"goal":[34],"was":[35],"investigate":[37,138],"performance":[39,129],"spatial":[41,86],"acoustic":[42],"features":[43],"extracted":[44],"input":[51],"convolutional":[54],"recurrent":[55],"neural":[56],"network":[57],"(CRNN),":[58],"in":[59,141],"relation":[60],"number":[63],"channels":[65],"employed":[66],"additive":[68],"noise.":[69],"To":[70],"end,":[72],"experiments":[73],"were":[74,109],"conducted":[75],"compare":[77],"generalized":[79],"cross-correlation":[80],"with":[81],"phase":[82],"transform":[83],"(GCC-PHAT),":[84],"cue-augmented":[87],"log-spectrogram":[88],"(SALSA)":[89],"features,":[90],"recently-proposed":[93],"beamforming":[94],"method,":[95],"evaluating":[96],"their":[97],"robustness":[98],"various":[100],"noise":[101],"intensities.":[102],"The":[103],"array":[104],"aperture":[105],"sampling":[107],"density":[108],"tested":[110],"taking":[112],"subsets":[113],"16-microphone":[116],"Results":[118],"tests":[120],"statistical":[122],"significance":[123],"demonstrate":[124],"microphones\u2019":[126],"contribution":[127],"on":[130],"TragicTalkers":[132],"dataset,":[133],"which":[134],"offers":[135],"opportunities":[136],"audio-visual":[139],"approaches":[140],"future.":[143]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
