{"id":"https://openalex.org/W2125496931","doi":"https://doi.org/10.1186/s13636-015-0068-3","title":"Phone recognition with hierarchical convolutional deep maxout networks","display_name":"Phone recognition with hierarchical convolutional deep maxout networks","publication_year":2015,"publication_date":"2015-09-04","ids":{"openalex":"https://openalex.org/W2125496931","doi":"https://doi.org/10.1186/s13636-015-0068-3","mag":"2125496931"},"language":"en","primary_location":{"id":"doi:10.1186/s13636-015-0068-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-015-0068-3","pdf_url":"https://asmp-eurasipjournals.springeropen.com/track/pdf/10.1186/s13636-015-0068-3","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://asmp-eurasipjournals.springeropen.com/track/pdf/10.1186/s13636-015-0068-3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020608163","display_name":"L\u00e1szl\u00f3 T\u00f3th","orcid":"https://orcid.org/0000-0003-0161-1375"},"institutions":[{"id":"https://openalex.org/I4210152167","display_name":"MTA-SZTE Research Group on Artificial Intelligence","ror":"https://ror.org/0507fk326","country_code":"HU","type":"facility","lineage":["https://openalex.org/I227486990","https://openalex.org/I4210152167","https://openalex.org/I7597260"]}],"countries":["HU"],"is_corresponding":true,"raw_author_name":"L\u00e1szl\u00f3 T\u00f3th","raw_affiliation_strings":["MTA-SZTE Research Group on Artificial Intelligence, Tisza Lajos krt. 103., Szeged, H-6720, Hungary","MTA-SZTE Research Group on Artificial Intelligence, Szeged, Hungary"],"affiliations":[{"raw_affiliation_string":"MTA-SZTE Research Group on Artificial Intelligence, Tisza Lajos krt. 103., Szeged, H-6720, Hungary","institution_ids":["https://openalex.org/I4210152167"]},{"raw_affiliation_string":"MTA-SZTE Research Group on Artificial Intelligence, Szeged, Hungary","institution_ids":["https://openalex.org/I4210152167"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5020608163"],"corresponding_institution_ids":["https://openalex.org/I4210152167"],"apc_list":{"value":1115,"currency":"GBP","value_usd":1367},"apc_paid":{"value":1115,"currency":"GBP","value_usd":1367},"fwci":17.4182,"has_fulltext":true,"cited_by_count":86,"citation_normalized_percentile":{"value":0.9916262,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"2015","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8138941526412964},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7145124673843384},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6979168057441711},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6099467277526855},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.5761861801147461},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5646684765815735},{"id":"https://openalex.org/keywords/timit","display_name":"TIMIT","score":0.5487943887710571},{"id":"https://openalex.org/keywords/activation-function","display_name":"Activation function","score":0.46103787422180176},{"id":"https://openalex.org/keywords/sigmoid-function","display_name":"Sigmoid function","score":0.4514918327331543},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4478597640991211},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3980177044868469},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3383091688156128},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3004133701324463},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.19618475437164307}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8138941526412964},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7145124673843384},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6979168057441711},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6099467277526855},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.5761861801147461},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5646684765815735},{"id":"https://openalex.org/C2778724510","wikidata":"https://www.wikidata.org/wiki/Q7670405","display_name":"TIMIT","level":3,"score":0.5487943887710571},{"id":"https://openalex.org/C38365724","wikidata":"https://www.wikidata.org/wiki/Q4677469","display_name":"Activation function","level":3,"score":0.46103787422180176},{"id":"https://openalex.org/C81388566","wikidata":"https://www.wikidata.org/wiki/Q526668","display_name":"Sigmoid function","level":3,"score":0.4514918327331543},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4478597640991211},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3980177044868469},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3383091688156128},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3004133701324463},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.19618475437164307},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s13636-015-0068-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-015-0068-3","pdf_url":"https://asmp-eurasipjournals.springeropen.com/track/pdf/10.1186/s13636-015-0068-3","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},{"id":"pmh:oai:publicatio.bibl.u-szeged.hu:5976","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400436","display_name":"SZTE Publicatio Repozit\u00f3rium (University of Szeged)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I227486990","host_organization_name":"University of Szeged","host_organization_lineage":["https://openalex.org/I227486990"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Foly\u00f3iratcikk"}],"best_oa_location":{"id":"doi:10.1186/s13636-015-0068-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13636-015-0068-3","pdf_url":"https://asmp-eurasipjournals.springeropen.com/track/pdf/10.1186/s13636-015-0068-3","source":{"id":"https://openalex.org/S19605986","display_name":"EURASIP Journal on Audio Speech and Music Processing","issn_l":"1687-4714","issn":["1687-4714","1687-4722"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"EURASIP Journal on Audio, Speech, and Music Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5099999904632568,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2125496931.pdf","grobid_xml":"https://content.openalex.org/works/W2125496931.grobid-xml"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W118756463","https://openalex.org/W587565084","https://openalex.org/W811578723","https://openalex.org/W1533861849","https://openalex.org/W1553004968","https://openalex.org/W1555696814","https://openalex.org/W1904365287","https://openalex.org/W1907282891","https://openalex.org/W1968419113","https://openalex.org/W1976894406","https://openalex.org/W1981617403","https://openalex.org/W1987841215","https://openalex.org/W1993882792","https://openalex.org/W2004227461","https://openalex.org/W2026369565","https://openalex.org/W2028706510","https://openalex.org/W2033310064","https://openalex.org/W2035424729","https://openalex.org/W2036242736","https://openalex.org/W2054139811","https://openalex.org/W2061305730","https://openalex.org/W2062227835","https://openalex.org/W2072349636","https://openalex.org/W2075925017","https://openalex.org/W2077804127","https://openalex.org/W2089917322","https://openalex.org/W2094384715","https://openalex.org/W2095635231","https://openalex.org/W2103126253","https://openalex.org/W2105153012","https://openalex.org/W2112739286","https://openalex.org/W2114016253","https://openalex.org/W2131342762","https://openalex.org/W2136442326","https://openalex.org/W2137919539","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2154833897","https://openalex.org/W2155273149","https://openalex.org/W2156387975","https://openalex.org/W2160306971","https://openalex.org/W2165712214","https://openalex.org/W2167458787","https://openalex.org/W2169840288","https://openalex.org/W2184170932","https://openalex.org/W2188183693","https://openalex.org/W2296748324","https://openalex.org/W2403731734","https://openalex.org/W2404206164","https://openalex.org/W2490163484","https://openalex.org/W2962719052","https://openalex.org/W6600020652","https://openalex.org/W6600195515","https://openalex.org/W6601977772","https://openalex.org/W6605340403","https://openalex.org/W6682889407","https://openalex.org/W6743198006"],"related_works":["https://openalex.org/W3024979424","https://openalex.org/W4283785902","https://openalex.org/W2895192346","https://openalex.org/W3032499992","https://openalex.org/W2041004593","https://openalex.org/W3134817226","https://openalex.org/W3110577345","https://openalex.org/W3136579697","https://openalex.org/W2811324119","https://openalex.org/W1504105233"],"abstract_inverted_index":{"Deep":[0],"convolutional":[1,28,133],"neural":[2,14],"networks":[3,15,29,166,183],"(CNNs)":[4],"have":[5,60],"recently":[6],"been":[7,62,94],"shown":[8,95],"to":[9,57,96,131,139,154,264],"outperform":[10,97],"fully":[11,48,103],"connected":[12,49,104],"deep":[13,50,187],"(DNNs)":[16],"both":[17],"on":[18,21,167,215,224,288,301,309],"low-resource":[19,311],"and":[20,115,122,193,315],"large-scale":[22],"speech":[23],"tasks.":[24],"Experiments":[25],"indicate":[26],"that":[27,59,109,200,238,318],"can":[30,127,207],"attain":[31],"a":[32,144,194,239,246,269,310],"10\u201315":[33],"%":[34,255],"relative":[35,247,271],"improvement":[36],"in":[37,102,268],"the":[38,68,86,98,110,116,124,141,155,161,181,202,209,213,222,225,260,282,296,320],"word":[39],"error":[40,210,249,273,284],"rate":[41,211,250,274,285],"of":[42,113,149,157,163,212,219,230,252,276],"large":[43,312],"vocabulary":[44,313],"recognition":[45,234],"tasks":[46],"over":[47,256],"networks.":[51,135],"Here,":[52],"we":[53,137,206,236,286,303,316],"explore":[54],"some":[55],"refinements":[56],"CNNs":[58,114],"not":[61],"pursued":[63],"by":[64,179],"other":[65,169],"authors.":[66],"First,":[67],"CNN":[69,142,240,266],"papers":[70],"published":[71],"up":[72],"till":[73],"now":[74],"used":[75],"sigmoid":[76],"or":[77],"rectified":[78],"linear":[79],"(ReLU)":[80],"neurons.":[81],"We":[82,106,174,197],"will":[83,107,175,198],"experiment":[84],"with":[85,189,201],"maxout":[87,117,134,243],"activation":[88,100],"function":[89,101,118],"proposed":[90,321],"recently,":[91],"which":[92,293],"has":[93],"rectifier":[99],"DNNs.":[105],"show":[108,199],"pooling":[111],"operation":[112],"are":[119],"closely":[120],"related,":[121],"so":[123],"two":[125,165,182],"technologies":[126],"be":[128],"readily":[129],"combined":[130],"build":[132],"Second,":[136],"propose":[138],"turn":[140],"into":[143,184],"hierarchical":[145,203,261],"model.":[146],"The":[147],"origins":[148],"this":[150,177,265,329],"approach":[151],"go":[152],"back":[153],"era":[156],"shallow":[158],"nets,":[159],"where":[160],"idea":[162],"stacking":[164],"each":[168],"was":[170],"relatively":[171],"well":[172],"known.":[173],"extend":[176],"method":[178],"fusing":[180],"one":[185],"joint":[186],"model":[188],"many":[190],"hidden":[191],"layers":[192],"special":[195],"structure.":[196],"modelling":[204,262,322],"approach,":[205],"reduce":[208],"network":[214],"an":[216],"expanded":[217],"context":[218],"input.":[220],"In":[221],"experiments":[223],"Texas":[226],"Instruments":[227],"Massachusetts":[228],"Institute":[229],"Technology":[231],"(TIMIT)":[232],"phone":[233,248,272],"task,":[235,314],"find":[237,317],"built":[241],"from":[242],"units":[244],"yields":[245],"reduction":[251,275],"about":[253],"4.3":[254],"ReLU":[257],"CNNs.":[258],"Applying":[259],"scheme":[263],"results":[267,327],"further":[270],"5.5":[277],"%.":[278],"Using":[279],"dropout":[280],"training,":[281],"lowest":[283],"get":[287],"TIMIT":[289],"is":[290,294],"16.5":[291],"%,":[292],"currently":[295],"best":[297,307],"result.":[298],"Besides":[299],"experimenting":[300],"TIMIT,":[302],"also":[304],"evaluate":[305],"our":[306],"models":[308],"all":[319],"improvements":[323],"give":[324],"consistently":[325],"better":[326],"for":[328],"larger":[330],"database":[331],"as":[332],"well.":[333]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":18},{"year":2018,"cited_by_count":20},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
