{"id":"https://openalex.org/W2964891022","doi":"https://doi.org/10.1109/taslp.2019.2930913","title":"Weakly Labelled AudioSet Tagging With Attention Neural Networks","display_name":"Weakly Labelled AudioSet Tagging With Attention Neural Networks","publication_year":2019,"publication_date":"2019-07-26","ids":{"openalex":"https://openalex.org/W2964891022","doi":"https://doi.org/10.1109/taslp.2019.2930913","mag":"2964891022"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2019.2930913","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2930913","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1903.00765","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072482416","display_name":"Qiuqiang Kong","orcid":"https://orcid.org/0000-0003-2864-0475"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Qiuqiang Kong","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021396403","display_name":"Changsong Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Changsong Yu","raw_affiliation_strings":["University of Stuttgart, 70174 Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart, 70174 Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112629146","display_name":"Yong Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yong Xu","raw_affiliation_strings":["Tencent AI Lab, Bellevue, WA, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Bellevue, WA, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056441203","display_name":"Turab Iqbal","orcid":"https://orcid.org/0000-0003-3393-2544"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Turab Iqbal","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676721","display_name":"Wenwu Wang","orcid":"https://orcid.org/0000-0002-8393-5703"},"institutions":[{"id":"https://openalex.org/I143413998","display_name":"Qingdao University of Science and Technology","ror":"https://ror.org/041j8js14","country_code":"CN","type":"education","lineage":["https://openalex.org/I143413998"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenwu Wang","raw_affiliation_strings":["Qingdao University of Science and Technology, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"Qingdao University of Science and Technology, Qingdao, China","institution_ids":["https://openalex.org/I143413998"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066967599","display_name":"Mark D. Plumbley","orcid":"https://orcid.org/0000-0002-9708-1075"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark D. Plumbley","raw_affiliation_strings":["Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K"],"affiliations":[{"raw_affiliation_string":"Centre for Vision, Speech and Signal Processing, University of Surrey, Guildford, U.K","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5072482416"],"corresponding_institution_ids":["https://openalex.org/I28290843"],"apc_list":null,"apc_paid":null,"fwci":11.0198,"has_fulltext":false,"cited_by_count":81,"citation_normalized_percentile":{"value":0.98983567,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"27","issue":"11","first_page":"1791","last_page":"1802"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9833999872207642,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7468610405921936},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6746870875358582},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.5251861810684204},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.5234412550926208},{"id":"https://openalex.org/keywords/sound-quality","display_name":"Sound quality","score":0.49492353200912476},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4397471249103546},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.41950953006744385},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4175609350204468},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3894835114479065}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7468610405921936},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6746870875358582},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.5251861810684204},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.5234412550926208},{"id":"https://openalex.org/C167310288","wikidata":"https://www.wikidata.org/wiki/Q7564808","display_name":"Sound quality","level":2,"score":0.49492353200912476},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4397471249103546},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.41950953006744385},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4175609350204468},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3894835114479065},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2019.2930913","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2930913","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1903.00765","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1903.00765","pdf_url":"https://arxiv.org/pdf/1903.00765","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:epubs.surrey.ac.uk:852511","is_oa":false,"landing_page_url":"http://epubs.surrey.ac.uk/852511/1/KongYuXuIWP19-aslp-audioset_accepted_ieee.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400680","display_name":"Surrey Research Insight Open Access (The University of Surrey)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28290843","host_organization_name":"University of Surrey","host_organization_lineage":["https://openalex.org/I28290843"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1903.00765","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1903.00765","pdf_url":"https://arxiv.org/pdf/1903.00765","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7799999713897705,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1093520412","display_name":null,"funder_award_id":"Scholarship","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G1426318481","display_name":null,"funder_award_id":"grant","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G2300736770","display_name":null,"funder_award_id":"(CSC)","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G3220109430","display_name":null,"funder_award_id":"201406150082","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G5939146501","display_name":"Making Sense of Sounds","funder_award_id":"EP/N014111/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G836970423","display_name":null,"funder_award_id":"201406","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G8589651859","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320322725","funder_display_name":"China Scholarship Council"},{"id":"https://openalex.org/G8995825145","display_name":null,"funder_award_id":"EP/N014111/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1535599202","https://openalex.org/W1554104313","https://openalex.org/W1590386482","https://openalex.org/W1665214252","https://openalex.org/W1799366690","https://openalex.org/W1902237438","https://openalex.org/W1929873255","https://openalex.org/W2001773889","https://openalex.org/W2008415856","https://openalex.org/W2010792435","https://openalex.org/W2032360374","https://openalex.org/W2038484192","https://openalex.org/W2048174296","https://openalex.org/W2076063813","https://openalex.org/W2078579128","https://openalex.org/W2086384421","https://openalex.org/W2092216166","https://openalex.org/W2095705004","https://openalex.org/W2107430826","https://openalex.org/W2108598243","https://openalex.org/W2108745803","https://openalex.org/W2110119381","https://openalex.org/W2133564696","https://openalex.org/W2154318594","https://openalex.org/W2163474322","https://openalex.org/W2176625348","https://openalex.org/W2339172597","https://openalex.org/W2354870669","https://openalex.org/W2408239454","https://openalex.org/W2408491073","https://openalex.org/W2414894569","https://openalex.org/W2487315868","https://openalex.org/W2509593407","https://openalex.org/W2526050071","https://openalex.org/W2529483679","https://openalex.org/W2531897166","https://openalex.org/W2566935005","https://openalex.org/W2593116425","https://openalex.org/W2597655663","https://openalex.org/W2604293017","https://openalex.org/W2775505379","https://openalex.org/W2775794021","https://openalex.org/W2785934082","https://openalex.org/W2791956393","https://openalex.org/W2807788374","https://openalex.org/W2883595988","https://openalex.org/W2883935097","https://openalex.org/W2884535146","https://openalex.org/W2919115771","https://openalex.org/W2949364537","https://openalex.org/W2950178297","https://openalex.org/W2963022469","https://openalex.org/W2963369619","https://openalex.org/W2963386218","https://openalex.org/W2963451564","https://openalex.org/W2963610932","https://openalex.org/W2963723765","https://openalex.org/W2963906190","https://openalex.org/W2963911037","https://openalex.org/W2964069537","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W2997685131","https://openalex.org/W3100031071","https://openalex.org/W4295723153","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6633146083","https://openalex.org/W6635120088","https://openalex.org/W6637242042","https://openalex.org/W6638444622","https://openalex.org/W6640500741","https://openalex.org/W6669992580","https://openalex.org/W6676245398","https://openalex.org/W6679434410","https://openalex.org/W6683033130","https://openalex.org/W6684158799","https://openalex.org/W6703720303","https://openalex.org/W6714426239","https://openalex.org/W6715395060","https://openalex.org/W6725362272","https://openalex.org/W6735377749","https://openalex.org/W6736350746","https://openalex.org/W6747331233","https://openalex.org/W6747701563","https://openalex.org/W6749158954","https://openalex.org/W6752516136","https://openalex.org/W6753583920","https://openalex.org/W6756219837"],"related_works":["https://openalex.org/W2329500892","https://openalex.org/W28991112","https://openalex.org/W2370726991","https://openalex.org/W2369710579","https://openalex.org/W4327728159","https://openalex.org/W4394266730","https://openalex.org/W1990856605","https://openalex.org/W2053783616","https://openalex.org/W4388913932","https://openalex.org/W4309130263"],"abstract_inverted_index":{"Audio":[0],"tagging":[1,22,40,90,187],"is":[2,44,58,71],"the":[3,7,64,77,86,102,112,152,166,185,197,203],"task":[4],"of":[5,11,34,48,68,106,163,170,178,199,205,207],"predicting":[6],"presence":[8,65],"or":[9,66],"absence":[10,67],"sound":[12,35,69,209],"classes":[13,70],"within":[14],"an":[15,107],"audio":[16,21,39,52,89,108,133,186],"clip.":[17,109],"Previous":[18],"work":[19],"in":[20,61],"focused":[23],"on":[24,41,148,189],"relatively":[25],"small":[26,32],"datasets":[27],"limited":[28],"to":[29,100],"recognizing":[30],"a":[31,45,98,158,193],"number":[33,198],"classes.":[36,56],"We":[37,110,135],"investigate":[38,136],"AudioSet,":[42],"which":[43],"dataset":[46],"consisting":[47],"over":[49],"2":[50],"million":[51],"clips":[53],"and":[54,79,118,124,127,145,172,202],"527":[55],"AudioSet":[57,149],"weakly":[59,87],"labelled,":[60],"that":[62,151,184],"only":[63],"known":[72],"for":[73,132],"each":[74,208],"clip,":[75],"whereas":[76],"onset":[78],"offset":[80],"times":[81],"are":[82],"unknown.":[83],"To":[84],"address":[85],"labelled":[88],"problem,":[91],"we":[92,182],"propose":[93,125],"attention":[94,115,129,137,154],"neural":[95,116,130,138,155,175],"networks":[96,117,131,139],"as":[97],"way":[99],"attend":[101],"most":[103],"salient":[104],"parts":[105],"bridge":[111],"connection":[113],"between":[114],"multiple":[119],"instance":[120],"learning":[121],"(MIL)":[122],"methods,":[123],"decision-level":[126],"feature-level":[128,153],"tagging.":[134],"modeled":[140],"by":[141],"different":[142],"functions,":[143],"depths,":[144],"widths.":[146],"Experiments":[147],"show":[150],"network":[156,176],"achieves":[157],"state-of-the-art":[159],"mean":[160],"average":[161],"precision":[162],"0.369,":[164],"outperforming":[165],"best":[167],"MIL":[168],"method":[169],"0.317":[171],"Google's":[173],"deep":[174],"baseline":[177],"0.314.":[179],"In":[180],"addition,":[181],"discover":[183],"performance":[188],"AudioSet-embedding":[190],"features":[191],"has":[192],"weak":[194],"correlation":[195],"with":[196],"training":[200],"samples":[201],"quality":[204],"labels":[206],"class.":[210]},"counts_by_year":[{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":19},{"year":2020,"cited_by_count":20},{"year":2019,"cited_by_count":6}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
