{"id":"https://openalex.org/W2940335648","doi":"https://doi.org/10.1109/icassp.2019.8682378","title":"Timescalenet : A Multiresolution Approach for Raw Audio Recognition","display_name":"Timescalenet : A Multiresolution Approach for Raw Audio Recognition","publication_year":2019,"publication_date":"2019-04-17","ids":{"openalex":"https://openalex.org/W2940335648","doi":"https://doi.org/10.1109/icassp.2019.8682378","mag":"2940335648"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2019.8682378","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682378","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006258841","display_name":"\u00c9ric Bavu","orcid":"https://orcid.org/0000-0001-6395-634X"},"institutions":[{"id":"https://openalex.org/I124158823","display_name":"Conservatoire National des Arts et M\u00e9tiers","ror":"https://ror.org/0175hh227","country_code":"FR","type":"education","lineage":["https://openalex.org/I124158823","https://openalex.org/I4210134562"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Eric Bavu","raw_affiliation_strings":["Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France","institution_ids":["https://openalex.org/I124158823"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040694178","display_name":"Aro Ramamonjy","orcid":null},"institutions":[{"id":"https://openalex.org/I124158823","display_name":"Conservatoire National des Arts et M\u00e9tiers","ror":"https://ror.org/0175hh227","country_code":"FR","type":"education","lineage":["https://openalex.org/I124158823","https://openalex.org/I4210134562"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Aro Ramamonjy","raw_affiliation_strings":["Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France","institution_ids":["https://openalex.org/I124158823"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032227336","display_name":"Hadrien Pujol","orcid":null},"institutions":[{"id":"https://openalex.org/I124158823","display_name":"Conservatoire National des Arts et M\u00e9tiers","ror":"https://ror.org/0175hh227","country_code":"FR","type":"education","lineage":["https://openalex.org/I124158823","https://openalex.org/I4210134562"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Hadrien Pujol","raw_affiliation_strings":["Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France","institution_ids":["https://openalex.org/I124158823"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057486415","display_name":"Alexandre Garcia","orcid":"https://orcid.org/0000-0002-8479-4199"},"institutions":[{"id":"https://openalex.org/I124158823","display_name":"Conservatoire National des Arts et M\u00e9tiers","ror":"https://ror.org/0175hh227","country_code":"FR","type":"education","lineage":["https://openalex.org/I124158823","https://openalex.org/I4210134562"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Alexandre Garcia","raw_affiliation_strings":["Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Laboratoire de M\u00e9canique des Structures et des Syst\u00e8mes Coupl\u00e9s (LMSSC), Conservatoire national des arts et m\u00e9tiers (Cnam), Paris, France","institution_ids":["https://openalex.org/I124158823"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1669,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.42211942,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7639915943145752},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6055783033370972},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5784981846809387},{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.5629401803016663},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.537107527256012},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.505500078201294},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4916597008705139},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4343150854110718},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.431901216506958},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4238879084587097},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.3932100534439087},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.2012469470500946},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08215400576591492}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7639915943145752},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6055783033370972},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5784981846809387},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.5629401803016663},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.537107527256012},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.505500078201294},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4916597008705139},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4343150854110718},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.431901216506958},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4238879084587097},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3932100534439087},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2012469470500946},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08215400576591492},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2019.8682378","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2019.8682378","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W396690109","https://openalex.org/W608300865","https://openalex.org/W1522301498","https://openalex.org/W1542280630","https://openalex.org/W1677182931","https://openalex.org/W2059652044","https://openalex.org/W2076105648","https://openalex.org/W2097772361","https://openalex.org/W2343218625","https://openalex.org/W2398826216","https://openalex.org/W2408093180","https://openalex.org/W2519091744","https://openalex.org/W2529337537","https://openalex.org/W2589857635","https://openalex.org/W2625477277","https://openalex.org/W2769912137","https://openalex.org/W2794150026","https://openalex.org/W2797583228","https://openalex.org/W2949382160","https://openalex.org/W2953333557","https://openalex.org/W2963103134","https://openalex.org/W2963454111","https://openalex.org/W2964121744","https://openalex.org/W2964187693","https://openalex.org/W6613520308","https://openalex.org/W6631190155","https://openalex.org/W6712560600","https://openalex.org/W6739879593","https://openalex.org/W6746451879","https://openalex.org/W6750665317","https://openalex.org/W6765069029","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W4293226380","https://openalex.org/W2021595906","https://openalex.org/W2766087987","https://openalex.org/W1606020025","https://openalex.org/W4321487865","https://openalex.org/W4313906399","https://openalex.org/W2095992409","https://openalex.org/W106304674","https://openalex.org/W2811106690","https://openalex.org/W2357857030"],"abstract_inverted_index":{"In":[0,36,154],"recent":[1],"years,":[2],"the":[3,15,37,42,65,70,74,100,141,145,155,162],"use":[4],"of":[5,44,56,83,128,136,173],"Deep":[6],"Learning":[7],"techniques":[8],"in":[9,148],"audio":[10],"signal":[11],"processing":[12],"has":[13],"led":[14],"scientific":[16],"community":[17],"to":[18,25,99,139],"develop":[19],"machine":[20,33],"learning":[21,52,60],"strategies":[22],"that":[23,87],"allow":[24],"build":[26],"efficient":[27,54],"representations":[28],"from":[29],"raw":[30],"waveforms":[31],"for":[32,184],"hearing":[34],"tasks.":[35],"present":[38,156],"paper,":[39,157],"we":[40],"show":[41],"benefit":[43],"a":[45,57,80,90,107,125,168],"multi-resolution":[46],"approach":[47],":":[48,180],"TimeScaleNet":[49,158],"aims":[50],"at":[51,64,69,144],"an":[53],"representation":[55],"sound,":[58],"by":[59],"time":[61,142],"dependencies":[62],"both":[63],"sample":[66,75],"level":[67],"and":[68,97,104,111],"frame":[71,146],"level.":[72],"At":[73],"level,":[76],"TimeScaleNet's":[77],"architecture":[78],"introduces":[79],"new":[81],"form":[82],"recurrent":[84],"neural":[85],"layer":[86],"acts":[88],"as":[89],"learnable":[91,114],"passband":[92],"biquadratic":[93],"digital":[94],"IIR":[95],"filterbank":[96],"self-adapts":[98],"specific":[101],"recognition":[102],"task":[103],"dataset,":[105],"with":[106],"large":[108],"receptive":[109],"field":[110],"very":[112,169],"few":[113],"parameters.":[115],"The":[116],"obtained":[117],"frame-level":[118],"feature":[119],"map":[120],"is":[121,159],"then":[122],"processed":[123],"using":[124,161],"residual":[126],"network":[127],"depthwise":[129],"separable":[130],"atrous":[131],"convolutions.":[132],"This":[133],"second":[134],"scale":[135],"analysis":[137],"allows":[138],"encode":[140],"fluctuations":[143],"timescale,":[147],"different":[149],"learnt":[150],"pooled":[151],"frequency":[152],"bands.":[153],"tested":[160],"Speech":[163],"Commands":[164],"Dataset.":[165],"We":[166],"report":[167],"high":[170],"mean":[171],"accuracy":[172],"94.87":[174],"\u00b1":[175,182],"0.24%":[176],"(macro":[177],"averaged":[178],"F1-score":[179],"94.9":[181],"0.24%)":[183],"this":[185],"particular":[186],"task.":[187]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
