{"id":"https://openalex.org/W2400676773","doi":"https://doi.org/10.1109/icassp.2016.7472698","title":"Convolutional neural network pre-trained with projection matrices on linear discriminant analysis","display_name":"Convolutional neural network pre-trained with projection matrices on linear discriminant analysis","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2400676773","doi":"https://doi.org/10.1109/icassp.2016.7472698","mag":"2400676773"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2016.7472698","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472698","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037089480","display_name":"Takashi Fukuda","orcid":"https://orcid.org/0000-0001-9599-6274"},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takashi Fukuda","raw_affiliation_strings":["IBM Watson Multimodal, IBM Japan Ltd., Chuo-ku, Tokyo, JAPAN"],"affiliations":[{"raw_affiliation_string":"IBM Watson Multimodal, IBM Japan Ltd., Chuo-ku, Tokyo, JAPAN","institution_ids":["https://openalex.org/I4210145865"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033500018","display_name":"Osamu Ichikawa","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Osamu Ichikawa","raw_affiliation_strings":["IBM Watson Multimodal, IBM Japan Ltd., Chuo-ku, Tokyo, JAPAN"],"affiliations":[{"raw_affiliation_string":"IBM Watson Multimodal, IBM Japan Ltd., Chuo-ku, Tokyo, JAPAN","institution_ids":["https://openalex.org/I4210145865"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013412274","display_name":"Ryuki Tachibana","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145865","display_name":"IBM Research - Tokyo","ror":"https://ror.org/04915qk43","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145865"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryuki Tachibana","raw_affiliation_strings":["IBM Watson Multimodal, IBM Japan Ltd., Chuo-ku, Tokyo, JAPAN"],"affiliations":[{"raw_affiliation_string":"IBM Watson Multimodal, IBM Japan Ltd., Chuo-ku, Tokyo, JAPAN","institution_ids":["https://openalex.org/I4210145865"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5037089480"],"corresponding_institution_ids":["https://openalex.org/I4210145865"],"apc_list":null,"apc_paid":null,"fwci":0.7565,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.70406097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5345","last_page":"5349"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7481287717819214},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.7399965524673462},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6968239545822144},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6643598079681396},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.608971118927002},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5628294348716736},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.4931301772594452},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.4456157684326172},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.43152642250061035},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42465826869010925},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4242638349533081},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4124159812927246},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.36213982105255127},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3373830318450928},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.30135321617126465}],"concepts":[{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7481287717819214},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.7399965524673462},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6968239545822144},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6643598079681396},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.608971118927002},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5628294348716736},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.4931301772594452},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.4456157684326172},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.43152642250061035},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42465826869010925},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4242638349533081},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4124159812927246},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.36213982105255127},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3373830318450928},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30135321617126465},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2016.7472698","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2016.7472698","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6800000071525574,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W255264","https://openalex.org/W86999225","https://openalex.org/W1627087495","https://openalex.org/W1940991344","https://openalex.org/W1971332722","https://openalex.org/W1990934990","https://openalex.org/W1999686891","https://openalex.org/W2075012882","https://openalex.org/W2101596234","https://openalex.org/W2104448323","https://openalex.org/W2112739286","https://openalex.org/W2112796928","https://openalex.org/W2134557905","https://openalex.org/W2140686411","https://openalex.org/W2141102245","https://openalex.org/W2144354855","https://openalex.org/W2152051032","https://openalex.org/W2155273149","https://openalex.org/W2158010000","https://openalex.org/W2158420521","https://openalex.org/W2962719052"],"related_works":["https://openalex.org/W2364370872","https://openalex.org/W2053269318","https://openalex.org/W2025614924","https://openalex.org/W4324119469","https://openalex.org/W2164868312","https://openalex.org/W2160650576","https://openalex.org/W2075383893","https://openalex.org/W2353313924","https://openalex.org/W1992295166","https://openalex.org/W2143508933"],"abstract_inverted_index":{"Recently,":[0],"the":[1,25,47,63,104,113,125,153,162,167,173],"hybrid":[2],"architecture":[3],"of":[4,139,161,175,183],"a":[5,10,37,73,101,120,189],"neural":[6,34],"network":[7,35],"(NN)":[8],"and":[9,115,127],"hidden":[11],"Markov":[12],"model":[13,29],"(HMM)":[14],"has":[15,177],"shown":[16],"significant":[17],"improvement":[18,192],"on":[19],"automatic":[20],"speech":[21],"recognition":[22],"(ASR)":[23],"over":[24],"conventional":[26],"Gaussian":[27],"mixture":[28],"(GMM)-based":[30],"system.":[31],"The":[32,185],"convolutional":[33,155],"(CNN),":[36],"successful":[38],"NN-based":[39],"system,":[40],"can":[41,86,171],"represent":[42],"local":[43,84,105,163,197],"spectral":[44,70,98],"variations":[45],"spanning":[46],"time-frequency":[48],"space.":[49],"Meanwhile,":[50],"spectro-temporal":[51,64],"features":[52,65],"have":[53,82],"been":[54],"widely":[55],"studied":[56],"to":[57,95,194],"make":[58],"ASR":[59],"more":[60],"robust.":[61],"Typically,":[62],"are":[66,109,116],"extracted":[67],"from":[68],"acoustic":[69],"patterns":[71],"using":[72,135],"2D":[74,97],"filtering":[75],"process.":[76],"Convolutional":[77],"layers":[78],"in":[79,107,124,157],"CNN":[80,108,195],"that":[81],"various":[83],"windows":[85,106,164],"also":[87],"be":[88],"regarded":[89],"as":[90,149,180],"an":[91],"efficient":[92],"feature":[93],"extractor":[94],"capture":[96],"variations.":[99],"In":[100,130],"standard":[102],"procedure,":[103],"initialized":[110,199],"randomly":[111],"before":[112],"pre-training":[114,126],"iteratively":[117],"updated":[118],"with":[119,196],"back":[121],"propagation":[122],"algorithm":[123],"fine-tuning":[128],"steps.":[129],"this":[131],"paper,":[132],"we":[133,170],"explore":[134],"projection":[136],"matrices":[137],"composed":[138],"eigenvectors":[140,174],"estimated":[141],"by":[142,166],"linear":[143],"discriminant":[144],"analysis":[145,160],"(LDA)":[146],"objective":[147],"function":[148],"initial":[150,181],"weights":[151,182,198],"for":[152],"first":[154],"layer":[156],"CNN.":[158,184],"From":[159],"trained":[165],"proposed":[168,186],"method,":[169],"see":[172],"LDA":[176],"desirable":[178],"properties":[179],"method":[187],"yielded":[188],"8.1%":[190],"relative":[191],"compared":[193],"randomly.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
