{"id":"https://openalex.org/W2654517624","doi":"https://doi.org/10.1109/icassp.2017.7952264","title":"Weakly-supervised audio event detection using event-specific Gaussian filters and fully convolutional networks","display_name":"Weakly-supervised audio event detection using event-specific Gaussian filters and fully convolutional networks","publication_year":2017,"publication_date":"2017-03-01","ids":{"openalex":"https://openalex.org/W2654517624","doi":"https://doi.org/10.1109/icassp.2017.7952264","mag":"2654517624"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2017.7952264","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7952264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102024410","display_name":"Ting-Wei Su","orcid":"https://orcid.org/0000-0002-1389-2082"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Ting-Wei Su","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101719938","display_name":"Jen-Yu Liu","orcid":"https://orcid.org/0000-0003-1299-6688"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]},{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jen-Yu Liu","raw_affiliation_strings":["Department of Electrical Engineering, National Taiwan University, Taiwan","Research Center for Information Technology Innovation, Academia Sinica, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061291906","display_name":"Yi\u2010Hsuan Yang","orcid":"https://orcid.org/0000-0002-2724-6161"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Hsuan Yang","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102024410"],"corresponding_institution_ids":["https://openalex.org/I4210086894"],"apc_list":null,"apc_paid":null,"fwci":7.212,"has_fulltext":false,"cited_by_count":54,"citation_normalized_percentile":{"value":0.97819637,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"791","last_page":"795"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8136473894119263},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.7180216312408447},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6574785113334656},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5679012537002563},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5570310354232788},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5243728160858154},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.437517911195755},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4262765944004059},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4189490079879761},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3286411762237549},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.12225142121315002}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8136473894119263},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.7180216312408447},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6574785113334656},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5679012537002563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5570310354232788},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5243728160858154},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.437517911195755},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4262765944004059},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4189490079879761},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3286411762237549},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.12225142121315002},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2017.7952264","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2017.7952264","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1496704041","https://openalex.org/W1606347560","https://openalex.org/W1903029394","https://openalex.org/W1972567154","https://openalex.org/W1981087263","https://openalex.org/W1994488211","https://openalex.org/W2038484192","https://openalex.org/W2043194666","https://openalex.org/W2059652044","https://openalex.org/W2146502635","https://openalex.org/W2155653793","https://openalex.org/W2163605009","https://openalex.org/W2292996718","https://openalex.org/W2354870669","https://openalex.org/W2398932003","https://openalex.org/W2402964572","https://openalex.org/W2405569690","https://openalex.org/W2525577390","https://openalex.org/W3103314642","https://openalex.org/W6636358008","https://openalex.org/W6681435938","https://openalex.org/W6684191040","https://openalex.org/W6696777030"],"related_works":["https://openalex.org/W4293226380","https://openalex.org/W4321487865","https://openalex.org/W4313906399","https://openalex.org/W2590798552","https://openalex.org/W2811106690","https://openalex.org/W4239306820","https://openalex.org/W2947043951","https://openalex.org/W2318112981","https://openalex.org/W4312417841","https://openalex.org/W4210874298"],"abstract_inverted_index":{"Audio":[0],"event":[1],"detection":[2],"aims":[3],"at":[4],"discovering":[5],"the":[6,16,19,27,73,78,115],"elements":[7],"inside":[8],"an":[9,93],"audio":[10,20],"clip.":[11],"In":[12],"addition":[13],"to":[14,24,87,100,110],"labeling":[15],"clips":[17],"with":[18,72,126],"events,":[21],"we":[22,43],"want":[23],"find":[25],"out":[26],"temporal":[28,116],"locations":[29],"of":[30,82,118],"these":[31],"events.":[32],"However,":[33],"creating":[34],"clearly":[35],"annotated":[36],"training":[37],"data":[38,57,61],"can":[39,62],"be":[40,63],"time-consuming.":[41],"Therefore,":[42],"provide":[44],"a":[45,88],"model":[46,84,107],"based":[47],"on":[48,55],"convolutional":[49,90],"neural":[50],"networks":[51],"that":[52],"relies":[53],"only":[54],"weakly-supervised":[56],"for":[58],"training.":[59],"These":[60],"directly":[64],"obtained":[65],"from":[66],"online":[67],"platforms,":[68],"such":[69],"as":[70],"Freesound,":[71],"clip-level":[74,127],"labels":[75],"assigned":[76],"by":[77],"uploaders.":[79],"The":[80],"structure":[81],"our":[83],"is":[85,98,108,123],"extended":[86],"fully":[89],"networks,":[91],"and":[92],"event-specific":[94],"Gaussian":[95],"filter":[96],"layer":[97],"designed":[99],"advance":[101],"its":[102],"learning":[103],"ability.":[104],"Besides,":[105],"this":[106],"able":[109],"detect":[111],"frame-level":[112],"information,":[113],"e.g.,":[114],"position":[117],"sounds,":[119],"even":[120],"when":[121],"it":[122],"trained":[124],"merely":[125],"labels.":[128]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":17},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
