{"id":"https://openalex.org/W1550985492","doi":"https://doi.org/10.1109/icassp.2015.7178827","title":"Combination of two-dimensional cochleogram and spectrogram features for deep learning-based ASR","display_name":"Combination of two-dimensional cochleogram and spectrogram features for deep learning-based ASR","publication_year":2015,"publication_date":"2015-04-01","ids":{"openalex":"https://openalex.org/W1550985492","doi":"https://doi.org/10.1109/icassp.2015.7178827","mag":"1550985492"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2015.7178827","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2015.7178827","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038296765","display_name":"Andros Tjandra","orcid":"https://orcid.org/0000-0003-1246-5908"},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]},{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["ID","JP"],"is_corresponding":true,"raw_author_name":"Andros Tjandra","raw_affiliation_strings":["Faculty of Computer Science, Universitas Indonesia, Indonesia","Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Universitas Indonesia, Indonesia","institution_ids":["https://openalex.org/I29617571"]},{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068811427","display_name":"Graham Neubig","orcid":"https://orcid.org/0000-0002-2072-3789"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Graham Neubig","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078330211","display_name":"Tomoki Toda","orcid":"https://orcid.org/0000-0001-8146-1279"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tomoki Toda","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052754690","display_name":"Mirna Adriani","orcid":null},"institutions":[{"id":"https://openalex.org/I29617571","display_name":"University of Indonesia","ror":"https://ror.org/0116zj450","country_code":"ID","type":"education","lineage":["https://openalex.org/I29617571"]}],"countries":["ID"],"is_corresponding":false,"raw_author_name":"Mirna Adriani","raw_affiliation_strings":["Faculty of Computer Science, Universitas Indonesia, Indonesia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Universitas Indonesia, Indonesia","institution_ids":["https://openalex.org/I29617571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology, Japan","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5038296765"],"corresponding_institution_ids":["https://openalex.org/I29617571","https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":1.7258,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.87714847,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4525","last_page":"4529"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10662","display_name":"Ultrasonics and Acoustic Wave Propagation","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9182470440864563},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6601343154907227},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5185083150863647},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48752471804618835},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43940669298171997},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.352509081363678}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9182470440864563},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6601343154907227},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5185083150863647},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48752471804618835},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43940669298171997},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.352509081363678}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2015.7178827","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2015.7178827","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"},{"id":"https://openalex.org/F4320335839","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W176145673","https://openalex.org/W1524333225","https://openalex.org/W1538131130","https://openalex.org/W1661756259","https://openalex.org/W1732877116","https://openalex.org/W1860981223","https://openalex.org/W1904365287","https://openalex.org/W1960653181","https://openalex.org/W1984541135","https://openalex.org/W1993882792","https://openalex.org/W1994396704","https://openalex.org/W2007645738","https://openalex.org/W2009150118","https://openalex.org/W2028706510","https://openalex.org/W2036242736","https://openalex.org/W2042141988","https://openalex.org/W2050758723","https://openalex.org/W2057498692","https://openalex.org/W2072128103","https://openalex.org/W2092216166","https://openalex.org/W2112796928","https://openalex.org/W2117671523","https://openalex.org/W2136922672","https://openalex.org/W2145094598","https://openalex.org/W2152175008","https://openalex.org/W2155273149","https://openalex.org/W2160815625","https://openalex.org/W2163605009","https://openalex.org/W2165712214","https://openalex.org/W2165880886","https://openalex.org/W2172097686","https://openalex.org/W2403239987","https://openalex.org/W2536935545","https://openalex.org/W2997574889","https://openalex.org/W4231109964","https://openalex.org/W6606992005","https://openalex.org/W6631362777","https://openalex.org/W6632100814","https://openalex.org/W6636969168","https://openalex.org/W6637402362","https://openalex.org/W6641135991","https://openalex.org/W6664918268","https://openalex.org/W6684191040","https://openalex.org/W6685168948"],"related_works":["https://openalex.org/W2731899572","https://openalex.org/W3215138031","https://openalex.org/W2897924318","https://openalex.org/W3009238340","https://openalex.org/W4321369474","https://openalex.org/W2138997758","https://openalex.org/W4360585206","https://openalex.org/W4285208911","https://openalex.org/W3082895349","https://openalex.org/W4213079790"],"abstract_inverted_index":{"This":[0],"paper":[1],"explores":[2],"the":[3,20,70,82],"use":[4],"of":[5,52,84,118],"auditory":[6],"features":[7,14,35,122,138],"based":[8],"on":[9,94],"cochleograms;":[10],"two":[11,119],"dimensional":[12,120],"speech":[13],"derived":[15],"from":[16,135,144],"gammatone":[17],"filters":[18],"within":[19,47],"convolutional":[21],"neural":[22,75,86],"network":[23,76,87],"(CNN)":[24],"framework.":[25],"Furthermore,":[26],"we":[27,45,56,67],"also":[28,68],"propose":[29],"various":[30],"possibilities":[31],"to":[32,58,127],"combine":[33,46],"cochleogram":[34],"with":[36,73],"log-mel":[37],"filter":[38],"banks":[39],"or":[40,139],"spectrogram":[41],"features.":[42,147],"In":[43],"particular,":[44],"low":[48],"and":[49,61],"high":[50],"levels":[51],"CNN":[53,136],"framework":[54,83],"which":[55],"refer":[57],"as":[59],"low-level":[60],"high-level":[62,116],"feature":[63,105],"combination.":[64],"As":[65],"comparison,":[66],"construct":[69],"similar":[71],"configuration":[72],"deep":[74],"(DNN).":[77],"Performance":[78],"was":[79,113],"evaluated":[80],"in":[81],"hybrid":[85],"-":[88],"hidden":[89],"Markov":[90],"model":[91],"(NN-HMM)":[92],"system":[93],"TIMIT":[95],"phoneme":[96,130],"sequence":[97],"recognition":[98],"task.":[99],"The":[100,110],"results":[101],"reveal":[102],"that":[103],"cochleogram-spectrogram":[104,121],"combination":[106,117],"provides":[107],"significant":[108],"advantages.":[109],"best":[111],"accuracy":[112],"obtained":[114],"by":[115],"using":[123],"CNN,":[124],"achieved":[125],"up":[126],"8.2%":[128],"relative":[129,141],"error":[131],"rate":[132],"(PER)":[133],"reduction":[134,143],"single":[137,146],"19.7%":[140],"PER":[142],"DNN":[145]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
