{"id":"https://openalex.org/W3119066640","doi":"https://doi.org/10.1109/taslp.2021.3049337","title":"On Improved Training of CNN for Acoustic Source Localisation","display_name":"On Improved Training of CNN for Acoustic Source Localisation","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3119066640","doi":"https://doi.org/10.1109/taslp.2021.3049337","mag":"3119066640"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3049337","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3049337","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/d73216bd-7084-4057-bfa5-f62d65b3c32e","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072511175","display_name":"Elizabeth Vargas","orcid":"https://orcid.org/0000-0002-4094-3631"},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]},{"id":"https://openalex.org/I4210085930","display_name":"Heriot-Watt University Malaysia","ror":"https://ror.org/0059w0420","country_code":"MY","type":"education","lineage":["https://openalex.org/I4210085930"]}],"countries":["GB","MY"],"is_corresponding":true,"raw_author_name":"Elizabeth Vargas","raw_affiliation_strings":["Institute of Sensors, Signals, and Systems, Heriot-Watt University, Edinburgh, U.K"],"affiliations":[{"raw_affiliation_string":"Institute of Sensors, Signals, and Systems, Heriot-Watt University, Edinburgh, U.K","institution_ids":["https://openalex.org/I4210085930","https://openalex.org/I32062511"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016434897","display_name":"James R. Hopgood","orcid":"https://orcid.org/0000-0002-3029-2425"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"James R. Hopgood","raw_affiliation_strings":["Institute of Digital Communications, School of Engineering, University of Edinburgh, Edinburgh, U.K"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Communications, School of Engineering, University of Edinburgh, Edinburgh, U.K","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030504110","display_name":"Keith Brown","orcid":"https://orcid.org/0000-0001-6836-1572"},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]},{"id":"https://openalex.org/I4210085930","display_name":"Heriot-Watt University Malaysia","ror":"https://ror.org/0059w0420","country_code":"MY","type":"education","lineage":["https://openalex.org/I4210085930"]}],"countries":["GB","MY"],"is_corresponding":false,"raw_author_name":"Keith Brown","raw_affiliation_strings":["Institute of Sensors, Signals, and Systems, Heriot-Watt University, Edinburgh, U.K"],"affiliations":[{"raw_affiliation_string":"Institute of Sensors, Signals, and Systems, Heriot-Watt University, Edinburgh, U.K","institution_ids":["https://openalex.org/I4210085930","https://openalex.org/I32062511"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086009008","display_name":"Kartic Subr","orcid":"https://orcid.org/0000-0002-7302-4383"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kartic Subr","raw_affiliation_strings":["Institute of Perception, Action and Behaviour, University of Edinburgh, Edinburgh, U.K"],"affiliations":[{"raw_affiliation_string":"Institute of Perception, Action and Behaviour, University of Edinburgh, Edinburgh, U.K","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5072511175"],"corresponding_institution_ids":["https://openalex.org/I32062511","https://openalex.org/I4210085930"],"apc_list":null,"apc_paid":null,"fwci":2.4589,"has_fulltext":true,"cited_by_count":22,"citation_normalized_percentile":{"value":0.8946767,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"29","issue":null,"first_page":"720","last_page":"732"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.771201491355896},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6874871850013733},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.590115487575531},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5138998031616211},{"id":"https://openalex.org/keywords/direction-of-arrival","display_name":"Direction of arrival","score":0.505671501159668},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4525843560695648},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43711990118026733},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.42380499839782715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41478079557418823},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41278165578842163},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08156046271324158}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.771201491355896},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6874871850013733},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.590115487575531},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5138998031616211},{"id":"https://openalex.org/C172051844","wikidata":"https://www.wikidata.org/wiki/Q5280438","display_name":"Direction of arrival","level":3,"score":0.505671501159668},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4525843560695648},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43711990118026733},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.42380499839782715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41478079557418823},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41278165578842163},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08156046271324158},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C21822782","wikidata":"https://www.wikidata.org/wiki/Q131214","display_name":"Antenna (radio)","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2021.3049337","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3049337","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/d73216bd-7084-4057-bfa5-f62d65b3c32e","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/d73216bd-7084-4057-bfa5-f62d65b3c32e","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Vargas, E, Hopgood, J, Brown, K & Subr, K 2021, 'On Improved Training of CNN for Acoustic Source Localisation', IEEE Transactions on Audio, Speech and Language Processing, vol. 29, pp. 720 - 732. https://doi.org/10.1109/TASLP.2021.3049337","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.ed.ac.uk:publications/d73216bd-7084-4057-bfa5-f62d65b3c32e","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11820/d73216bd-7084-4057-bfa5-f62d65b3c32e","pdf_url":"https://www.pure.ed.ac.uk/ws/files/187043849/T_ASLP_2020_RQ_ii.pdf","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/d73216bd-7084-4057-bfa5-f62d65b3c32e","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/d73216bd-7084-4057-bfa5-f62d65b3c32e","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Vargas, E, Hopgood, J, Brown, K & Subr, K 2021, 'On Improved Training of CNN for Acoustic Source Localisation', IEEE Transactions on Audio, Speech and Language Processing, vol. 29, pp. 720 - 732. https://doi.org/10.1109/TASLP.2021.3049337","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.49000000953674316}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311518","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W81726370","https://openalex.org/W1517164593","https://openalex.org/W1585269459","https://openalex.org/W1971920230","https://openalex.org/W1996304098","https://openalex.org/W2005402103","https://openalex.org/W2038484192","https://openalex.org/W2046317813","https://openalex.org/W2047760827","https://openalex.org/W2095425552","https://openalex.org/W2099471712","https://openalex.org/W2113638573","https://openalex.org/W2117678320","https://openalex.org/W2130721371","https://openalex.org/W2139159265","https://openalex.org/W2163605009","https://openalex.org/W2173520492","https://openalex.org/W2314859257","https://openalex.org/W2403149086","https://openalex.org/W2509065397","https://openalex.org/W2611943505","https://openalex.org/W2618530766","https://openalex.org/W2663904211","https://openalex.org/W2701869962","https://openalex.org/W2742947407","https://openalex.org/W2743050194","https://openalex.org/W2771922328","https://openalex.org/W2810850190","https://openalex.org/W2810934215","https://openalex.org/W2883024703","https://openalex.org/W2883288608","https://openalex.org/W2884001105","https://openalex.org/W2885219692","https://openalex.org/W2888999318","https://openalex.org/W2889426390","https://openalex.org/W2890996850","https://openalex.org/W2894295011","https://openalex.org/W2897361856","https://openalex.org/W2897977894","https://openalex.org/W2901948927","https://openalex.org/W2910760432","https://openalex.org/W2913983001","https://openalex.org/W2914312680","https://openalex.org/W2952350176","https://openalex.org/W2962708126","https://openalex.org/W2962845248","https://openalex.org/W2962879692","https://openalex.org/W2963684088","https://openalex.org/W2964121744","https://openalex.org/W2964342924","https://openalex.org/W3015449694","https://openalex.org/W3015885816","https://openalex.org/W3098357269","https://openalex.org/W3127686677","https://openalex.org/W4295521014","https://openalex.org/W4297817572","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6685352114","https://openalex.org/W6735913928","https://openalex.org/W6746576432","https://openalex.org/W6755257315","https://openalex.org/W6758210894","https://openalex.org/W6789826613","https://openalex.org/W6910574228"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2380075625","https://openalex.org/W2233261550","https://openalex.org/W4295532600","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2063823869","https://openalex.org/W2997094352","https://openalex.org/W4293226380","https://openalex.org/W3216976533"],"abstract_inverted_index":{"Convolutional":[0],"Neural":[1],"Networks":[2],"(CNNs)":[3],"are":[4,137,155,177],"a":[5,30,84,91,142,160,179,186],"popular":[6],"choice":[7],"for":[8,90,140,212],"estimating":[9,16,213],"Direction":[10],"of":[11,93,108,198,209],"Arrival":[12],"(DoA)":[13],"without":[14],"explicitly":[15],"delays":[17],"between":[18],"multiple":[19],"microphones.":[20],"The":[21],"CNN":[22,41,161,180],"method":[23],"first":[24],"optimises":[25],"unknown":[26],"filter":[27],"weights":[28],"(of":[29],"CNN)":[31],"by":[32,170],"using":[33,56,62,215],"observations":[34],"and":[35,60,100,111,134,153,157],"ground-truth":[36],"directional":[37],"information.":[38],"This":[39,123,183],"trained":[40],"is":[42,125],"then":[43],"used":[44,201],"to":[45,104,116],"predict":[46],"incident":[47],"directions":[48],"given":[49],"test":[50,61,152,175],"observations.":[51],"Most":[52],"existing":[53],"methods":[54,169],"train":[55],"spectrally-flat":[57],"random":[58,121],"signals":[59,82,136,200],"speech.":[63],"In":[64],"this":[65],"paper,":[66],"which":[67,131],"focuses":[68],"on":[69],"single":[70],"source":[71],"DoA":[72,88,214],"estimation,":[73],"we":[74],"find":[75],"that":[76],"training":[77,117,154,159],"with":[78,118,162],"speech":[79,133,163],"or":[80],"music":[81,135],"produces":[83],"relative":[85],"improvement":[86,107,124],"in":[87,128,130,192],"accuracy":[89],"variety":[92],"audio":[94],"classes":[95],"across":[96],"16":[97],"acoustic":[98,149],"conditions":[99,176],"9":[101],"DoAs,":[102],"amounting":[103],"an":[105],"average":[106],"around":[109],"17%":[110],"19%":[112],"respectively":[113],"when":[114],"compared":[115],"spectrally":[119],"flat":[120],"signals.":[122],"also":[126],"observed":[127],"scenarios":[129],"the":[132,148,174,193,196,199,207],"synthesised":[138],"using,":[139],"example,":[141],"Generative":[143],"Adversarial":[144],"Network":[145],"(GAN).":[146],"When":[147,173],"environments":[150],"during":[151,202],"similar":[156],"reverberant,":[158],"outperforms":[164],"Generalized":[165],"Cross":[166],"Correlation":[167],"(GCC)":[168],"about":[171],"125%.":[172],"different,":[178],"performs":[181],"comparably.":[182],"paper":[184],"takes":[185],"step":[187],"towards":[188],"answering":[189],"open":[190],"questions":[191],"literature":[194],"regarding":[195],"nature":[197],"training,":[203],"as":[204,206],"well":[205],"amount":[208],"data":[210],"required":[211],"CNNs.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5}],"updated_date":"2026-03-08T08:50:53.379069","created_date":"2025-10-10T00:00:00"}
