{"id":"https://openalex.org/W2561826558","doi":"https://doi.org/10.1109/taslp.2017.2690563","title":"Unsupervised Feature Learning Based on Deep Models for Environmental Audio Tagging","display_name":"Unsupervised Feature Learning Based on Deep Models for Environmental Audio Tagging","publication_year":2017,"publication_date":"2017-05-23","ids":{"openalex":"https://openalex.org/W2561826558","doi":"https://doi.org/10.1109/taslp.2017.2690563","mag":"2561826558"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2017.2690563","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2690563","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/taslp.2017.2690563","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101467338","display_name":"Yong Xu","orcid":"https://orcid.org/0000-0003-4944-6890"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yong Xu","raw_affiliation_strings":["Centre for Vision, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, University of Surrey, Guildford, U.K","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013817745","display_name":"Qiang Huang","orcid":"https://orcid.org/0000-0003-1120-4685"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiang Huang","raw_affiliation_strings":["Centre for Vision, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, University of Surrey, Guildford, U.K","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Centre for Vision, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, University of Surrey, Guildford, U.K","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009532141","display_name":"Peter Foster","orcid":"https://orcid.org/0000-0003-1818-5886"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Foster","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113798270","display_name":"Siddharth Sigtia","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Siddharth Sigtia","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022165330","display_name":"Philip J. B. Jackson","orcid":"https://orcid.org/0000-0001-7933-5935"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Philip J. B. Jackson","raw_affiliation_strings":["Centre for Vision, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, University of Surrey, Guildford, U.K","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066967599","display_name":"Mark D. Plumbley","orcid":"https://orcid.org/0000-0002-9708-1075"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark D. Plumbley","raw_affiliation_strings":["Centre for Vision, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, University of Surrey, Guildford, U.K","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101467338"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":9.7005,"has_fulltext":false,"cited_by_count":87,"citation_normalized_percentile":{"value":0.98644691,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"25","issue":"6","first_page":"1230","last_page":"1241"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.800524115562439},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5943664312362671},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5498780608177185},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5452594757080078},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5088215470314026},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4983391761779785},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.49646979570388794},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.490906298160553},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.47213083505630493},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.44771096110343933},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4344160854816437},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.41714048385620117},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4156475365161896},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.41548678278923035}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.800524115562439},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5943664312362671},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5498780608177185},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5452594757080078},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5088215470314026},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4983391761779785},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.49646979570388794},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.490906298160553},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.47213083505630493},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.44771096110343933},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4344160854816437},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.41714048385620117},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4156475365161896},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.41548678278923035},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/taslp.2017.2690563","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2690563","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:alma.44SUR_INST:11138530800002346","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4210197018","display_name":"View","issn_l":"2688-268X","issn":["2688-268X","2688-3988"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:arXiv.org:1607.03681","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1607.03681","pdf_url":"https://arxiv.org/pdf/1607.03681","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:epubs.surrey.ac.uk:813726","is_oa":false,"landing_page_url":"http://epubs.surrey.ac.uk/813726/18/XuHuangWangFSJP17-CC-BY.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400680","display_name":"Surrey Research Insight Open Access (The University of Surrey)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28290843","host_organization_name":"University of Surrey","host_organization_lineage":["https://openalex.org/I28290843"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2017.2690563","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2017.2690563","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.6800000071525574,"display_name":"Life in Land"}],"awards":[{"id":"https://openalex.org/G5939146501","display_name":"Making Sense of Sounds","funder_award_id":"EP/N014111/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7370593653","display_name":null,"funder_award_id":"EP/M507088/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8995825145","display_name":null,"funder_award_id":"EP/N014111/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W60493759","https://openalex.org/W94094186","https://openalex.org/W1503398984","https://openalex.org/W1508121828","https://openalex.org/W1560616740","https://openalex.org/W1650531274","https://openalex.org/W1689977300","https://openalex.org/W1904365287","https://openalex.org/W1985180072","https://openalex.org/W1993882792","https://openalex.org/W2012897754","https://openalex.org/W2025768430","https://openalex.org/W2039739794","https://openalex.org/W2044893557","https://openalex.org/W2059652044","https://openalex.org/W2062164080","https://openalex.org/W2062227835","https://openalex.org/W2065554891","https://openalex.org/W2069943693","https://openalex.org/W2077456266","https://openalex.org/W2081884280","https://openalex.org/W2100495367","https://openalex.org/W2102381657","https://openalex.org/W2108745803","https://openalex.org/W2118858186","https://openalex.org/W2126109423","https://openalex.org/W2135342008","https://openalex.org/W2136450786","https://openalex.org/W2145027719","https://openalex.org/W2148577181","https://openalex.org/W2160815625","https://openalex.org/W2163922914","https://openalex.org/W2166452403","https://openalex.org/W2168249484","https://openalex.org/W2176625348","https://openalex.org/W2212106655","https://openalex.org/W2250384498","https://openalex.org/W2295991281","https://openalex.org/W2354870669","https://openalex.org/W2399607803","https://openalex.org/W2403766732","https://openalex.org/W2414894569","https://openalex.org/W2524365899","https://openalex.org/W2963369619","https://openalex.org/W3103314642","https://openalex.org/W3103455452","https://openalex.org/W3121574643","https://openalex.org/W6630222583","https://openalex.org/W6640036494","https://openalex.org/W6677919164","https://openalex.org/W6682050847","https://openalex.org/W6684234533","https://openalex.org/W6696901769","https://openalex.org/W6712735165","https://openalex.org/W6713298849","https://openalex.org/W6715395060"],"related_works":["https://openalex.org/W2983142544","https://openalex.org/W2891059443","https://openalex.org/W3037110488","https://openalex.org/W4281663961","https://openalex.org/W3208888551","https://openalex.org/W4313561566","https://openalex.org/W3208386644","https://openalex.org/W4220682630","https://openalex.org/W4389832810","https://openalex.org/W3181622257"],"abstract_inverted_index":{"Environmental":[0],"audio":[1,27,182,248],"tagging":[2,28,183,249],"aims":[3],"to":[4,26,40,53,77,108,123,135,198],"predict":[5],"only":[6,88],"the":[7,16,55,60,70,75,83,110,114,117,141,164,167,172,179,201,217,222,226,234,241,245,254],"presence":[8],"or":[9,127,133],"absence":[10],"of":[11,66,69,113,166,178,244,253,257],"certain":[12],"acoustic":[13,18,33,61],"events":[14],"in":[15,29],"interested":[17],"scene.":[19],"In":[20],"this":[21,258],"paper,":[22],"we":[23,121],"make":[24],"contributions":[25],"two":[30],"parts,":[31],"respectively,":[32],"modeling":[34],"and":[35,100,155],"feature":[36,51,119],"learning.":[37],"We":[38],"propose":[39,122],"use":[41,124],"a":[42,63,79,125,189,210],"shrinking":[43],"deep":[44,129],"neural":[45],"network":[46],"(DNN)":[47],"framework":[48],"incorporating":[49],"unsupervised":[50,118],"learning":[52],"handle":[54],"multilabel":[56,80],"classification":[57,81],"task.":[58],"For":[59,116],"modeling,":[62],"large":[64],"set":[65,243],"contextual":[67,159],"frames":[68],"chunk":[71,89],"are":[72,97,105,150],"fed":[73],"into":[74],"DNN":[76,168,219],"perform":[78],"for":[82],"expected":[84],"tags,":[85],"considering":[86],"that":[87,230],"(or":[90],"utterance)":[91],"level":[92],"rather":[93],"than":[94],"frame-level":[95],"labels":[96],"available.":[98],"Dropout":[99],"background":[101,153],"noise":[102,154],"aware":[103],"training":[104],"also":[106,228],"adopted":[107],"improve":[109,163],"generalization":[111],"capability":[112],"DNNs.":[115],"learning,":[120],"symmetric":[126],"asymmetric":[128],"denoising":[130],"auto-encoder":[131],"(syDAE":[132],"asyDAE)":[134],"generate":[136],"new":[137,147],"data-driven":[138],"features":[139],"from":[140,196],"logarithmic":[142],"Mel-filter":[143],"banks":[144],"features.":[145],"The":[146,204],"features,":[148],"which":[149],"smoothed":[151],"against":[152],"more":[156],"compact":[157],"with":[158,171,216,237],"information,":[160],"can":[161,208],"further":[162],"performance":[165,236],"baseline.":[169],"Compared":[170],"standard":[173],"Gaussian":[174],"mixture":[175],"model":[176],"baseline":[177,220],"DCASE":[180,246],"2016":[181,247],"challenge,":[184],"our":[185,231],"proposed":[186,205],"method":[187],"obtains":[188,233],"significant":[190],"equal":[191],"error":[192],"rate":[193],"(EER)":[194],"reduction":[195,214],"0.21":[197],"0.13":[199],"on":[200,221,240],"development":[202,223],"set.":[203,224],"asyDAE":[206],"system":[207],"get":[209],"relative":[211],"6.7%":[212],"EER":[213,239,252],"compared":[215],"strong":[218],"Finally,":[225],"results":[227],"show":[229],"approach":[232],"state-of-the-art":[235],"0.15":[238],"evaluation":[242],"task":[250],"while":[251],"first":[255],"prize":[256],"challenge":[259],"is":[260],"0.17.":[261]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":19},{"year":2018,"cited_by_count":16},{"year":2017,"cited_by_count":7}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
