{"id":"https://openalex.org/W2909463944","doi":"https://doi.org/10.1109/iros.2018.8593925","title":"Multi-timescale Feature-extraction Architecture of Deep Neural Networks for Acoustic Model Training from Raw Speech Signal","display_name":"Multi-timescale Feature-extraction Architecture of Deep Neural Networks for Acoustic Model Training from Raw Speech Signal","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2909463944","doi":"https://doi.org/10.1109/iros.2018.8593925","mag":"2909463944"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2018.8593925","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2018.8593925","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018403421","display_name":"Ryu Takeda","orcid":"https://orcid.org/0009-0007-0518-6245"},"institutions":[{"id":"https://openalex.org/I4210138169","display_name":"Osaka Research Institute of Industrial Science and Technology","ror":"https://ror.org/03r38cy24","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210138169"]},{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryu Takeda","raw_affiliation_strings":["The Institute of Scientific and Industrial Research, Osaka University, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Institute of Scientific and Industrial Research, Osaka University, Japan","institution_ids":["https://openalex.org/I4210138169","https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091708408","display_name":"Kazuhiro Nakadai","orcid":"https://orcid.org/0000-0002-6134-4558"},"institutions":[{"id":"https://openalex.org/I1283473643","display_name":"Honda (Japan)","ror":"https://ror.org/03jzay846","country_code":"JP","type":"company","lineage":["https://openalex.org/I1283473643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuhiro Nakadai","raw_affiliation_strings":["Honda Research Institute Japan, Co. Ltd., Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Honda Research Institute Japan, Co. Ltd., Japan","institution_ids":["https://openalex.org/I1283473643"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049614400","display_name":"Kazunori Komatani","orcid":"https://orcid.org/0000-0002-6052-600X"},"institutions":[{"id":"https://openalex.org/I4210138169","display_name":"Osaka Research Institute of Industrial Science and Technology","ror":"https://ror.org/03r38cy24","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210138169"]},{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazunori Komatani","raw_affiliation_strings":["The Institute of Scientific and Industrial Research, Osaka University, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Institute of Scientific and Industrial Research, Osaka University, Japan","institution_ids":["https://openalex.org/I4210138169","https://openalex.org/I98285908"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3379,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.7064529,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"12","issue":null,"first_page":"2503","last_page":"2510"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7989047765731812},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7231435775756836},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5922062993049622},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5906152725219727},{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.5081315040588379},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48468759655952454},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.47892484068870544},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.47458434104919434},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4584573805332184},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.43963223695755005},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42468714714050293},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.29112234711647034}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7989047765731812},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7231435775756836},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5922062993049622},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5906152725219727},{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.5081315040588379},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48468759655952454},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.47892484068870544},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.47458434104919434},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4584573805332184},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.43963223695755005},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42468714714050293},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.29112234711647034},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros.2018.8593925","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2018.8593925","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6800000071525574,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W1542280630","https://openalex.org/W1582482241","https://openalex.org/W1600744878","https://openalex.org/W1666984270","https://openalex.org/W1799366690","https://openalex.org/W1969851134","https://openalex.org/W1984570643","https://openalex.org/W1998924366","https://openalex.org/W2037740282","https://openalex.org/W2079623482","https://openalex.org/W2080005694","https://openalex.org/W2081753968","https://openalex.org/W2107223151","https://openalex.org/W2112739286","https://openalex.org/W2146502635","https://openalex.org/W2147768505","https://openalex.org/W2160306971","https://openalex.org/W2160569100","https://openalex.org/W2160815625","https://openalex.org/W2218442486","https://openalex.org/W2288471100","https://openalex.org/W2293634267","https://openalex.org/W2394932179","https://openalex.org/W2396230943","https://openalex.org/W2400092632","https://openalex.org/W2402146185","https://openalex.org/W2403149086","https://openalex.org/W2408093180","https://openalex.org/W2508048623","https://openalex.org/W2512499206","https://openalex.org/W2513345070","https://openalex.org/W2592600618","https://openalex.org/W2964182776","https://openalex.org/W3034729383","https://openalex.org/W6631362777","https://openalex.org/W6637061625","https://openalex.org/W6639822467","https://openalex.org/W6681435938","https://openalex.org/W6683974394","https://openalex.org/W6696934422","https://openalex.org/W6712100661","https://openalex.org/W6712742799","https://openalex.org/W6712930963","https://openalex.org/W6779469704"],"related_works":["https://openalex.org/W2591697403","https://openalex.org/W2953716828","https://openalex.org/W2904857019","https://openalex.org/W2944728705","https://openalex.org/W3011538607","https://openalex.org/W2904022177","https://openalex.org/W2359348847","https://openalex.org/W4321441197","https://openalex.org/W4294432981","https://openalex.org/W4321276295"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"a":[3,76],"new":[4],"architecture":[5,79,119,144],"of":[6,25,47,60,67,80,84,95,101,137,152],"deep":[7],"neural":[8],"networks":[9,103],"(DNNs)":[10],"for":[11,40,111],"acoustic":[12],"models.":[13],"Training":[14],"DNNs":[15,81],"from":[16],"raw":[17,48],"speech":[18,38,49,96,153],"signals":[19,50],"will":[20],"provide":[21],"1)":[22],"novel":[23],"features":[24,140],"signals,":[26],"2)":[27],"normalization-free":[28],"processing":[29],"such":[30],"as":[31],"utterance-wise":[32],"mean":[33],"subtraction,":[34],"and":[35,92,108,149],"3)":[36],"low-latency":[37],"recognition":[39,55,71],"robot":[41],"audition.":[42],"Exploiting":[43],"the":[44,65,116,121,133,138,147],"longer":[45,61],"context":[46],"seems":[51],"useful":[52],"in":[53,64],"improving":[54],"accuracy.":[56],"However,":[57],"naive":[58],"use":[59],"contexts":[62],"results":[63],"loss":[66],"short-term":[68,93],"patterns;":[69],"thus,":[70],"accuracy":[72],"degrades.":[73],"We":[74],"propose":[75],"multi-timescale":[77,118],"feature-extraction":[78],"with":[82,129,132],"blocks":[83],"different":[85],"time":[86],"scales,":[87],"which":[88],"enable":[89],"capturing":[90],"long-":[91],"patterns":[94],"signals.":[97],"Each":[98],"block":[99],"consists":[100],"complex-valued":[102],"that":[104,115,142],"correspond":[105],"to":[106],"Fourier":[107],"filterbank":[109],"transformations":[110],"analysis.":[112],"Experiments":[113],"showed":[114],"proposed":[117],"reduced":[120],"word":[122],"error":[123],"rate":[124],"by":[125],"about":[126],"3%":[127],"compared":[128],"those":[130],"only":[131],"longterm":[134],"context.":[135],"Analysis":[136],"extracted":[139],"revealed":[141],"our":[143],"efficiently":[145],"captured":[146],"slow":[148],"fast":[150],"changes":[151],"features.":[154]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
