{"id":"https://openalex.org/W3081424945","doi":"https://doi.org/10.1109/access.2020.3019084","title":"nnAudio: An on-the-Fly GPU Audio to Spectrogram Conversion Toolbox Using 1D Convolutional Neural Networks","display_name":"nnAudio: An on-the-Fly GPU Audio to Spectrogram Conversion Toolbox Using 1D Convolutional Neural Networks","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3081424945","doi":"https://doi.org/10.1109/access.2020.3019084","mag":"3081424945"},"language":"en","primary_location":{"id":"doi:10.1109/access.2020.3019084","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3019084","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/09174990.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/09174990.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069612434","display_name":"Kin Wai Cheuk","orcid":"https://orcid.org/0000-0003-3213-8242"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Kin Wai Cheuk","raw_affiliation_strings":["Information Systems, Technology and Design, Singapore University of Technology and Design","Institute of High Performance Computing, Agency for Science, Technology and Research"],"affiliations":[{"raw_affiliation_string":"Information Systems, Technology and Design, Singapore University of Technology and Design","institution_ids":["https://openalex.org/I152815399"]},{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science, Technology and Research","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063855877","display_name":"Hans Anderson","orcid":"https://orcid.org/0000-0001-8230-7724"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hans Anderson","raw_affiliation_strings":["Blue Mangoo Software, Hung Yen, Vietnam"],"affiliations":[{"raw_affiliation_string":"Blue Mangoo Software, Hung Yen, Vietnam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078786239","display_name":"Kat Agres","orcid":"https://orcid.org/0000-0001-7260-2447"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]},{"id":"https://openalex.org/I3004594783","display_name":"Institute of High Performance Computing","ror":"https://ror.org/02n0ejh50","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3004594783","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kat Agres","raw_affiliation_strings":["Institute of High Performance Computing, Agency for Science, Technology and Research","Yong Siew Toh Conservatory of Music, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute of High Performance Computing, Agency for Science, Technology and Research","institution_ids":["https://openalex.org/I3004594783","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Yong Siew Toh Conservatory of Music, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069548004","display_name":"Dorien Herremans","orcid":"https://orcid.org/0000-0001-8607-1640"},"institutions":[{"id":"https://openalex.org/I152815399","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87","country_code":"SG","type":"education","lineage":["https://openalex.org/I152815399"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dorien Herremans","raw_affiliation_strings":["Information Systems, Technology and Design, Singapore University of Technology and Design"],"affiliations":[{"raw_affiliation_string":"Information Systems, Technology and Design, Singapore University of Technology and Design","institution_ids":["https://openalex.org/I152815399"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5069612434"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I152815399","https://openalex.org/I3004594783"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":6.8778,"has_fulltext":true,"cited_by_count":74,"citation_normalized_percentile":{"value":0.97814885,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"8","issue":null,"first_page":"161981","last_page":"162003"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.950714111328125},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8186386823654175},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6418924927711487},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47333699464797974},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.457142174243927},{"id":"https://openalex.org/keywords/short-time-fourier-transform","display_name":"Short-time Fourier transform","score":0.4533124268054962},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.42367318272590637},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4116133749485016},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.400025874376297},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.3809739351272583},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.29401084780693054},{"id":"https://openalex.org/keywords/fourier-analysis","display_name":"Fourier analysis","score":0.12215465307235718},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08100172877311707}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.950714111328125},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8186386823654175},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6418924927711487},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47333699464797974},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.457142174243927},{"id":"https://openalex.org/C166386157","wikidata":"https://www.wikidata.org/wiki/Q1477735","display_name":"Short-time Fourier transform","level":4,"score":0.4533124268054962},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.42367318272590637},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4116133749485016},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.400025874376297},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.3809739351272583},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.29401084780693054},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.12215465307235718},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08100172877311707},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2020.3019084","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3019084","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/09174990.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:scholarbank.nus.edu.sg:10635/198955","is_oa":false,"landing_page_url":"https://scholarbank.nus.edu.sg/handle/10635/198955","pdf_url":null,"source":{"id":"https://openalex.org/S7407052290","display_name":"National University of Singapore","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Scopus OA2020","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:fc754591f8114deaa9530e307f062e37","is_oa":true,"landing_page_url":"https://doaj.org/article/fc754591f8114deaa9530e307f062e37","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 8, Pp 161981-162003 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2020.3019084","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.3019084","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/09174990.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2884910486","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320322724","funder_display_name":"Ministry of Education, India"},{"id":"https://openalex.org/G5540169971","display_name":null,"funder_award_id":"SRG ISTD 2017 129","funder_id":"https://openalex.org/F4320324110","funder_display_name":"Singapore University of Technology and Design"},{"id":"https://openalex.org/G6205196381","display_name":null,"funder_award_id":"SING-2018-02-0204","funder_id":"https://openalex.org/F4320320696","funder_display_name":"Agency for Science, Technology and Research"},{"id":"https://openalex.org/G7023510852","display_name":null,"funder_award_id":"IDG31800103","funder_id":"https://openalex.org/F4320324110","funder_display_name":"Singapore University of Technology and Design"},{"id":"https://openalex.org/G8307628981","display_name":null,"funder_award_id":"SINGA","funder_id":"https://openalex.org/F4320320696","funder_display_name":"Agency for Science, Technology and Research"}],"funders":[{"id":"https://openalex.org/F4320309369","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44"},{"id":"https://openalex.org/F4320320696","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09"},{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"},{"id":"https://openalex.org/F4320324110","display_name":"Singapore University of Technology and Design","ror":"https://ror.org/05j6fvn87"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3081424945.pdf","grobid_xml":"https://content.openalex.org/works/W3081424945.grobid-xml"},"referenced_works_count":66,"referenced_works":["https://openalex.org/W608300865","https://openalex.org/W1538131130","https://openalex.org/W1604034532","https://openalex.org/W1838323663","https://openalex.org/W1975461858","https://openalex.org/W2042105302","https://openalex.org/W2076608692","https://openalex.org/W2086393337","https://openalex.org/W2103387126","https://openalex.org/W2105143211","https://openalex.org/W2106271918","https://openalex.org/W2106710516","https://openalex.org/W2112565646","https://openalex.org/W2112796928","https://openalex.org/W2148154194","https://openalex.org/W2168350281","https://openalex.org/W2191779130","https://openalex.org/W2317919972","https://openalex.org/W2485688913","https://openalex.org/W2519091744","https://openalex.org/W2559688696","https://openalex.org/W2721931776","https://openalex.org/W2725868244","https://openalex.org/W2753779507","https://openalex.org/W2794150026","https://openalex.org/W2798530805","https://openalex.org/W2889191349","https://openalex.org/W2889717020","https://openalex.org/W2899771611","https://openalex.org/W2902808043","https://openalex.org/W2904843390","https://openalex.org/W2921083967","https://openalex.org/W2922288583","https://openalex.org/W2931364255","https://openalex.org/W2935162632","https://openalex.org/W2940247716","https://openalex.org/W2950335938","https://openalex.org/W2950733131","https://openalex.org/W2954224973","https://openalex.org/W2963045359","https://openalex.org/W2963985474","https://openalex.org/W2971591236","https://openalex.org/W2986842658","https://openalex.org/W2991402284","https://openalex.org/W3007068036","https://openalex.org/W3008587939","https://openalex.org/W3016243847","https://openalex.org/W3091273176","https://openalex.org/W4232336823","https://openalex.org/W4287813281","https://openalex.org/W4297791000","https://openalex.org/W6601150824","https://openalex.org/W6603616073","https://openalex.org/W6632100814","https://openalex.org/W6730401039","https://openalex.org/W6733936739","https://openalex.org/W6740333300","https://openalex.org/W6740352302","https://openalex.org/W6756040250","https://openalex.org/W6756545897","https://openalex.org/W6764992649","https://openalex.org/W6765174816","https://openalex.org/W6767470613","https://openalex.org/W6769896225","https://openalex.org/W6773722262","https://openalex.org/W6776981032"],"related_works":["https://openalex.org/W2120540196","https://openalex.org/W3095343173","https://openalex.org/W2381036744","https://openalex.org/W2288135719","https://openalex.org/W2323749021","https://openalex.org/W2533590149","https://openalex.org/W2901989338","https://openalex.org/W200102888","https://openalex.org/W82005754","https://openalex.org/W2334448276"],"abstract_inverted_index":{"In":[0],"this":[1,54],"paper,":[2],"we":[3],"present":[4],"nnAudio,":[5,104,231],"a":[6,174],"new":[7],"neural":[8,23,84,116],"network-based":[9],"audio":[10,187,200],"processing":[11,15,145,247,257],"framework":[12,151,161,242],"with":[13,98,156,214],"graphics":[14],"unit":[16,146],"(GPU)":[17],"support":[18],"that":[19,82,185],"leverages":[20,106],"1D":[21,114],"convolutional":[22,115],"networks":[24],"to":[25,29,39,46,100,179,197,209],"perform":[26],"time":[27,97,167],"domain":[28,31],"frequency":[30],"conversion.":[32],"It":[33],"allows":[34,57],"on-the-fly":[35],"spectrogram":[36,90,165,212],"extraction":[37,166],"due":[38],"its":[40,120],"fast":[41],"speed,":[42],"without":[43],"the":[44,51,60,66,75,79,83,101,107,143,164,169,180,186,191,218],"need":[45],"store":[47],"any":[48],"spectrograms":[49],"on":[50,59,152],"disk.":[52],"Moreover,":[53],"approach":[55],"also":[56,243],"back-propagation":[58],"waveforms-to-spectrograms":[61],"transformation":[62,67,77],"layer,":[63],"and":[64,127,159,252],"hence,":[65],"process":[68],"can":[69],"be":[70],"made":[71],"trainable,":[72],"further":[73],"optimizing":[74],"waveform-to-spectrogram":[76],"for":[78,230],"specific":[80],"task":[81],"network":[85,117],"is":[86,228,233],"trained":[87],"on.":[88],"All":[89],"implementations":[91,131,140],"scale":[92],"as":[93,250],"Big-O":[94],"of":[95,113,135,171,182,190,204,225,256],"linear":[96],"respect":[99],"input":[102,199],"length.":[103,193],"however,":[105],"compute":[108],"unified":[109],"device":[110],"architecture":[111],"(CUDA)":[112],"from":[118,168,217],"PyTorch,":[119],"short-time":[121],"Fourier":[122],"transform":[123,129],"(STFT),":[124],"Mel":[125],"spectrogram,":[126],"constant-Q":[128],"(CQT)":[130],"are":[132,189,207],"an":[133,202],"order":[134,170,181],"magnitude":[136],"faster":[137,237],"than":[138,238],"other":[139],"using":[141,221],"only":[142],"central":[144],"(CPU).":[147],"We":[148],"tested":[149],"our":[150,160],"three":[153],"different":[154,215],"machines":[155],"NVIDIA":[157],"GPUs,":[158],"significantly":[162],"reduces":[163],"seconds":[172],"(using":[173],"popular":[175],"python":[176],"library":[177],"librosa)":[178],"milliseconds,":[183],"given":[184],"recordings":[188],"same":[192],"When":[194],"applying":[195],"nnAudio":[196],"variable":[198],"lengths,":[201],"average":[203,224],"11.5":[205],"hours":[206,227],"required":[208,229],"extract":[210],"34":[211],"types":[213],"parameters":[216],"MusicNet":[219],"dataset":[220],"librosa.":[222,239],"An":[223],"2.8":[226],"which":[232],"still":[234],"four":[235],"times":[236],"Our":[240],"proposed":[241],"outperforms":[244],"existing":[245],"GPU":[246],"libraries":[248],"such":[249],"Kapre":[251],"torchaudio":[253],"in":[254],"terms":[255],"speed.":[258]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
