{"id":"https://openalex.org/W3162465594","doi":"https://doi.org/10.1109/icassp39728.2021.9414954","title":"Learned Transferable Architectures Can Surpass Hand-Designed Architectures for Large Scale Speech Recognition","display_name":"Learned Transferable Architectures Can Surpass Hand-Designed Architectures for Large Scale Speech Recognition","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3162465594","doi":"https://doi.org/10.1109/icassp39728.2021.9414954","mag":"3162465594"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9414954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074469791","display_name":"Liqiang He","orcid":"https://orcid.org/0000-0001-9179-6398"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liqiang He","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075183307","display_name":"Dan Su","orcid":"https://orcid.org/0000-0001-5746-9545"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Su","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Yu","raw_affiliation_strings":["Tencent AI Lab, Bellevue, WA, USA"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Bellevue, WA, USA","institution_ids":["https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5074469791"],"corresponding_institution_ids":["https://openalex.org/I2250653659"],"apc_list":null,"apc_paid":null,"fwci":0.5439,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71767525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"6788","last_page":"6792"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8282883167266846},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.7774938941001892},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.48658454418182373},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4683358669281006},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.44228479266166687},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.43238940834999084},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40385910868644714},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3346499800682068},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3220502436161041},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.0771251916885376}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8282883167266846},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.7774938941001892},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.48658454418182373},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4683358669281006},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44228479266166687},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.43238940834999084},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40385910868644714},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3346499800682068},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3220502436161041},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0771251916885376},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9414954","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9414954","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.4399999976158142,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1600744878","https://openalex.org/W2005708641","https://openalex.org/W2112739286","https://openalex.org/W2160815625","https://openalex.org/W2164579587","https://openalex.org/W2402146185","https://openalex.org/W2405883473","https://openalex.org/W2407080277","https://openalex.org/W2515439472","https://openalex.org/W2553303224","https://openalex.org/W2794209590","https://openalex.org/W2810075754","https://openalex.org/W2889048668","https://openalex.org/W2892009249","https://openalex.org/W2951104886","https://openalex.org/W2963242190","https://openalex.org/W2963308316","https://openalex.org/W2964081807","https://openalex.org/W2965658867","https://openalex.org/W2972451902","https://openalex.org/W2981748264","https://openalex.org/W3015492205","https://openalex.org/W3104896896","https://openalex.org/W6712930963","https://openalex.org/W6713762819","https://openalex.org/W6729956949","https://openalex.org/W6752515464","https://openalex.org/W6754473786"],"related_works":["https://openalex.org/W2588198209","https://openalex.org/W1909006023","https://openalex.org/W4205824991","https://openalex.org/W3200723557","https://openalex.org/W2952348651","https://openalex.org/W4312713546","https://openalex.org/W2362195430","https://openalex.org/W2347494122","https://openalex.org/W2567983276","https://openalex.org/W2038503502"],"abstract_inverted_index":{"In":[0],"this":[1,121],"paper,":[2],"we":[3,41],"explore":[4,54],"the":[5,18,22,29,33,37,50,55,65,69,76,88,98,105,116,123,139],"neural":[6],"architecture":[7,19,66,90],"search":[8,20,45,51,71],"(NAS)":[9],"for":[10],"automatic":[11],"speech":[12],"recognition":[13],"(ASR)":[14],"systems.":[15,147],"We":[16],"conduct":[17],"on":[21,36,97,104],"small":[23],"proxy":[24],"dataset,":[25,107],"and":[26,79],"then":[27],"evaluate":[28],"network,":[30],"constructed":[31],"from":[32],"searched":[34,89],"architecture,":[35],"large":[38,106,131],"dataset.":[39],"Specially,":[40],"propose":[42],"a":[43,130],"revised":[44,70],"space":[46,72],"that":[47],"theoretically":[48],"facilitates":[49],"algorithm":[52],"to":[53,135,144],"architectures":[56],"with":[57,83,109,129],"low":[58],"complexity.":[59],"Extensive":[60],"experiments":[61],"show":[62],"that:":[63],"(i)":[64],"learned":[67],"in":[68],"can":[73,91],"greatly":[74],"reduce":[75],"computational":[77],"overhead":[78],"GPU":[80],"memory":[81],"usage":[82],"mild":[84],"performance":[85],"degradation.":[86],"(ii)":[87],"achieve":[92],"more":[93],"than":[94],"15%":[95],"(average":[96],"four":[99],"test":[100],"sets)":[101],"relative":[102],"improvements":[103],"compared":[108],"our":[110,119],"best":[111,117],"hand-designed":[112],"DFSMN-SAN":[113],"architecture.":[114],"To":[115],"of":[118,126,142],"knowledge,":[120],"is":[122],"first":[124],"report":[125],"NAS":[127,143],"results":[128],"scale":[132],"dataset":[133],"(up":[134],"10K":[136],"hours),":[137],"indicating":[138],"promising":[140],"application":[141],"industrial":[145],"ASR":[146]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
