{"id":"https://openalex.org/W2964218314","doi":"https://doi.org/10.1109/icassp.2018.8462046","title":"Sample-Level CNN Architectures for Music Auto-Tagging Using Raw Waveforms","display_name":"Sample-Level CNN Architectures for Music Auto-Tagging Using Raw Waveforms","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2964218314","doi":"https://doi.org/10.1109/icassp.2018.8462046","mag":"2964218314"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8462046","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462046","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101942787","display_name":"Tae\u2010Jun Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Taejun Kim","raw_affiliation_strings":["School of Electrical and Computer Engineering, University of Seoul"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Computer Engineering, University of Seoul","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049268304","display_name":"Jongpil Lee","orcid":"https://orcid.org/0000-0002-1126-0081"},"institutions":[{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]},{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Jongpil Lee","raw_affiliation_strings":["Graduate School of Culture Technology, KAIST"],"affiliations":[{"raw_affiliation_string":"Graduate School of Culture Technology, KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056437111","display_name":"Juhan Nam","orcid":"https://orcid.org/0000-0003-2664-2119"},"institutions":[{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]},{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Juhan Nam","raw_affiliation_strings":["Graduate School of Culture Technology, KAIST"],"affiliations":[{"raw_affiliation_string":"Graduate School of Culture Technology, KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101942787"],"corresponding_institution_ids":["https://openalex.org/I124633538"],"apc_list":null,"apc_paid":null,"fwci":8.092,"has_fulltext":false,"cited_by_count":91,"citation_normalized_percentile":{"value":0.98302725,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"366","last_page":"370"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8570363521575928},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.8290598392486572},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7128168344497681},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.6571425199508667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5613996386528015},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5608707070350647},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5294435024261475},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.46317794919013977},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.44749000668525696},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4468982517719269},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.44203898310661316},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.418325811624527},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3009871244430542}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8570363521575928},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.8290598392486572},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7128168344497681},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6571425199508667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5613996386528015},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5608707070350647},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5294435024261475},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.46317794919013977},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.44749000668525696},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4468982517719269},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.44203898310661316},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.418325811624527},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3009871244430542},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2018.8462046","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462046","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.8199999928474426,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1998808035","https://openalex.org/W2059652044","https://openalex.org/W2127870748","https://openalex.org/W2194775991","https://openalex.org/W2242773987","https://openalex.org/W2293255527","https://openalex.org/W2302255633","https://openalex.org/W2414894569","https://openalex.org/W2516282932","https://openalex.org/W2526050071","https://openalex.org/W2529337537","https://openalex.org/W2592168896","https://openalex.org/W2593179621","https://openalex.org/W2604509013","https://openalex.org/W2752782242","https://openalex.org/W2963451564","https://openalex.org/W2964137095","https://openalex.org/W3027908062","https://openalex.org/W3105202226","https://openalex.org/W4293665662","https://openalex.org/W4294375521","https://openalex.org/W6637373629","https://openalex.org/W6649863312","https://openalex.org/W6678969435","https://openalex.org/W6682778277","https://openalex.org/W6689262250","https://openalex.org/W6690394000","https://openalex.org/W6698183232","https://openalex.org/W6713132643","https://openalex.org/W6715395060","https://openalex.org/W6728126488","https://openalex.org/W6729831399","https://openalex.org/W6734751391"],"related_works":["https://openalex.org/W1974895211","https://openalex.org/W2176409448","https://openalex.org/W4226493464","https://openalex.org/W4312417841","https://openalex.org/W3193565141","https://openalex.org/W3133861977","https://openalex.org/W3167935049","https://openalex.org/W2964954556","https://openalex.org/W3029198973","https://openalex.org/W3019910406"],"abstract_inverted_index":{"Recent":[0],"work":[1],"has":[2],"shown":[3],"that":[4,82],"the":[5,25,42,73,92,112],"end-to-end":[6],"approach":[7,26],"using":[8,32],"convolutional":[9],"neural":[10],"network":[11],"(CNN)":[12],"is":[13],"effective":[14],"in":[15,75],"various":[16],"types":[17],"of":[18,72],"machine":[19],"learning":[20],"tasks.":[21],"For":[22],"audio":[23],"signals,":[24],"takes":[27],"raw":[28],"waveforms":[29],"as":[30],"input":[31],"an":[33],"1-D":[34,43,113],"convolution":[35],"layer.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40,103],"improve":[41],"CNN":[44,77,114],"architecture":[45],"for":[46],"music":[47],"auto-tagging":[48],"by":[49],"adopting":[50],"building":[51,76],"blocks":[52],"from":[53],"state-of-the-art":[54,89],"image":[55],"classification":[56],"models,":[57],"ResNets":[58],"and":[59,61,95,105],"SENets,":[60],"adding":[62],"multi-level":[63],"feature":[64],"aggregation":[65],"to":[66,109],"it.":[67],"We":[68],"compare":[69],"different":[70],"combinations":[71],"modules":[74],"architectures.":[78],"The":[79],"results":[80,97],"show":[81,110],"they":[83],"achieve":[84],"significant":[85],"improvements":[86],"over":[87],"previous":[88],"models":[90],"on":[91,98],"MagnaTagATune":[93],"dataset":[94],"comparable":[96],"Million":[99],"Song":[100],"Dataset.":[101],"Furthermore,":[102],"analyze":[104],"visualize":[106],"our":[107],"model":[108],"how":[111],"operates.":[115]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":18},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":15},{"year":2018,"cited_by_count":4},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
