{"id":"https://openalex.org/W2294139527","doi":"https://doi.org/10.21437/interspeech.2014-225","title":"Deep scattering spectra with deep neural networks for LVCSR tasks","display_name":"Deep scattering spectra with deep neural networks for LVCSR tasks","publication_year":2014,"publication_date":"2014-09-14","ids":{"openalex":"https://openalex.org/W2294139527","doi":"https://doi.org/10.21437/interspeech.2014-225","mag":"2294139527"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2014-225","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-225","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081105332","display_name":"Vijayaditya Peddinti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vijayaditya Peddinti","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003725957","display_name":"Brian Kingsbury","orcid":"https://orcid.org/0000-0002-1343-6837"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brian Kingsbury","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006171209","display_name":"Petr Fousek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Petr Fousek","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071715737","display_name":"Bhuvana Ramabhadran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhuvana Ramabhadran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5007586881","display_name":"D. Nahamoo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Nahamoo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5070513394"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.2721,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.93224453,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"900","last_page":"904"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8596791625022888},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6563007831573486},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.6283842325210571},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5247885584831238},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5018231868743896},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49386507272720337},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.4581751525402069},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.4574304521083832},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.44894811511039734},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.438793420791626},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4146541655063629},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4142901301383972},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.2672802209854126}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8596791625022888},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6563007831573486},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.6283842325210571},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5247885584831238},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5018231868743896},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49386507272720337},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4581751525402069},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.4574304521083832},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.44894811511039734},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.438793420791626},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4146541655063629},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4142901301383972},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2672802209854126},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2014-225","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2014-225","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2014","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W185597827","https://openalex.org/W1534876456","https://openalex.org/W1576579425","https://openalex.org/W1627087495","https://openalex.org/W2001619934","https://openalex.org/W2002342963","https://openalex.org/W2028706510","https://openalex.org/W2072072671","https://openalex.org/W2079623482","https://openalex.org/W2090861223","https://openalex.org/W2093231248","https://openalex.org/W2106119541","https://openalex.org/W2112739286","https://openalex.org/W2114016253","https://openalex.org/W2118595744","https://openalex.org/W2130426352","https://openalex.org/W2137075158","https://openalex.org/W2147676440","https://openalex.org/W2148154194","https://openalex.org/W2154833897","https://openalex.org/W2962719052"],"related_works":["https://openalex.org/W80423236","https://openalex.org/W3164669818","https://openalex.org/W1573546415","https://openalex.org/W2906993205","https://openalex.org/W2071572975","https://openalex.org/W2619993508","https://openalex.org/W2154037907","https://openalex.org/W2808291730","https://openalex.org/W2381525442","https://openalex.org/W2592921646"],"abstract_inverted_index":{"Log-mel":[0],"filterbank":[1],"features,":[2,123],"which":[3,129],"are":[4,141],"commonly":[5],"used":[6],"features":[7,37,57,112,140],"for":[8,44,58,72],"CNNs,":[9],"can":[10,151],"remove":[11],"higher-resolution":[12],"information":[13],"from":[14],"the":[15,53,64,81,84,94,110],"speech":[16],"signal.":[17],"A":[18],"novel":[19],"technique,":[20],"known":[21],"as":[22],"Deep":[23],"Scattering":[24],"Spectrum":[25],"(DSS),":[26],"addresses":[27],"this":[28,34,49,155],"issue":[29],"and":[30,46,68,101,132,148],"looks":[31],"to":[32,79,143],"preserve":[33],"information.":[35],"DSS":[36,56,85,95,111,139],"have":[38],"shown":[39],"promise":[40],"on":[41,98],"TIMIT,":[42],"both":[43],"classification":[45],"recognition.":[47],"In":[48],"paper,":[50],"we":[51,62,76,136],"extend":[52],"use":[54],"of":[55,83],"LVCSR":[59,73],"tasks.":[60,74],"First,":[61],"explore":[63,77],"optimal":[65],"multi-resolution":[66,144],"time":[67],"frequency":[69],"scattering":[70],"operations":[71],"Next,":[75],"techniques":[78,92],"reduce":[80],"dimension":[82],"features.":[86,96],"We":[87],"also":[88],"incorporate":[89],"speaker":[90],"adaptation":[91],"into":[93],"Results":[97],"a":[99,115,125],"50":[100],"430":[102],"hour":[103],"English":[104],"Broadcast":[105],"News":[106],"task":[107],"show":[108,137],"that":[109,138],"provide":[113],"between":[114],"4-7%":[116],"relative":[117],"improvement":[118],"in":[119],"WER":[120],"over":[121],"log-mel":[122,145],"within":[124],"state-of-the-art":[126],"CNN":[127],"framework":[128],"incorporates":[130],"speaker-adaptation":[131],"sequence":[133],"training.":[134],"Finally,":[135],"similar":[142,149],"+":[146],"MFCCs,":[147],"improvements":[150],"be":[152],"obtained":[153],"with":[154],"representation.":[156]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
