{"id":"https://openalex.org/W3046667470","doi":"https://doi.org/10.1109/taslp.2021.3071662","title":"TutorNet: Towards Flexible Knowledge Distillation for End-to-End Speech Recognition","display_name":"TutorNet: Towards Flexible Knowledge Distillation for End-to-End Speech Recognition","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3046667470","doi":"https://doi.org/10.1109/taslp.2021.3071662","mag":"3046667470"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2021.3071662","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3071662","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2008.00671","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ji Won Yoon","orcid":"https://orcid.org/0000-0001-8631-4489"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ji Won Yoon","raw_affiliation_strings":["Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea"],"raw_orcid":"https://orcid.org/0000-0001-8631-4489","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hyeonseung Lee","orcid":"https://orcid.org/0000-0001-6997-205X"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyeonseung Lee","raw_affiliation_strings":["Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea"],"raw_orcid":"https://orcid.org/0000-0001-6997-205X","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hyung Yong Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyung Yong Kim","raw_affiliation_strings":["Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Won Ik Cho","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Won Ik Cho","raw_affiliation_strings":["Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":null,"display_name":"Nam Soo Kim","orcid":"https://orcid.org/0000-0002-0568-4902"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Nam Soo Kim","raw_affiliation_strings":["Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea"],"raw_orcid":"https://orcid.org/0000-0002-0568-4902","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering and the Institute of New Media and Communications, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2389,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.89660929,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"29","issue":null,"first_page":"1626","last_page":"1638"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.904699981212616,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.904699981212616,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.009600000455975533,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.008700000122189522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.7353000044822693},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7132999897003174},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5895000100135803},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.5659000277519226},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5562000274658203},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.5375999808311462},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5177000164985657},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4300999939441681}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7813000082969666},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.7353000044822693},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7132999897003174},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5895000100135803},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5837000012397766},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.5659000277519226},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5562000274658203},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5375999808311462},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5220000147819519},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5177000164985657},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4300999939441681},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3995000123977661},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3684000074863434},{"id":"https://openalex.org/C84685590","wikidata":"https://www.wikidata.org/wiki/Q1540472","display_name":"Knowledge engineering","level":2,"score":0.3625999987125397},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.31279999017715454},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3073999881744385},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.26190000772476196}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2021.3071662","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2021.3071662","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2008.00671","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2008.00671","pdf_url":"https://arxiv.org/pdf/2008.00671","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2008.00671","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2008.00671","pdf_url":"https://arxiv.org/pdf/2008.00671","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1533416326","https://openalex.org/W2127141656","https://openalex.org/W2291513470","https://openalex.org/W2327501763","https://openalex.org/W2507699225","https://openalex.org/W2508418541","https://openalex.org/W2530876040","https://openalex.org/W2597757402","https://openalex.org/W2627092829","https://openalex.org/W2711861986","https://openalex.org/W2739879705","https://openalex.org/W2747909401","https://openalex.org/W2766219058","https://openalex.org/W2793383859","https://openalex.org/W2892008152","https://openalex.org/W2913178639","https://openalex.org/W2936774411","https://openalex.org/W2936993002","https://openalex.org/W2962780374","https://openalex.org/W2962784628","https://openalex.org/W2963738441","https://openalex.org/W2963864497","https://openalex.org/W2973040747","https://openalex.org/W2973215447","https://openalex.org/W2997454826","https://openalex.org/W6600284362","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6637373629","https://openalex.org/W6637551013","https://openalex.org/W6638523607","https://openalex.org/W6638749077","https://openalex.org/W6675365184","https://openalex.org/W6679855610","https://openalex.org/W6679909955","https://openalex.org/W6687566353","https://openalex.org/W6697339895","https://openalex.org/W6712847557","https://openalex.org/W6727336983","https://openalex.org/W6730179637","https://openalex.org/W6739901393","https://openalex.org/W6754473786","https://openalex.org/W6756104738","https://openalex.org/W6762625080","https://openalex.org/W6767671539","https://openalex.org/W6769971416"],"related_works":[],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"there":[3],"has":[4,58],"been":[5,59],"a":[6,53,65,72,135,217],"great":[7],"deal":[8],"of":[9,86,98,100,111,124,151,201,219,244],"research":[10],"in":[11],"developing":[12],"end-to-end":[13,32,139],"speech":[14,140],"recognition":[15],"models,":[16],"which":[17,51,210],"enable":[18],"simplifying":[19],"the":[20,84,87,91,96,101,109,115,125,155,161,174,180,187,192,207,211,229,242,245],"traditional":[21],"pipeline":[22],"and":[23,67,74,177],"achieving":[24],"promising":[25],"results.":[26],"Despite":[27],"their":[28],"remarkable":[29],"performance":[30,243],"improvements,":[31],"models":[33],"typically":[34],"require":[35],"expensive":[36],"computational":[37,46],"cost":[38],"to":[39,61,71,122,209,240],"show":[40],"successful":[41],"performance.":[42],"To":[43,128],"reduce":[44],"this":[45,131],"burden,":[47],"knowledge":[48,63,170,182,230],"distillation":[49,171,183],"(KD),":[50],"is":[52,194,222],"popular":[54],"model":[55,69,76,112,117],"compression":[56],"method,":[57],"used":[60],"transfer":[62],"from":[64],"deep":[66],"complex":[68],"(teacher)":[70],"shallower":[73],"simpler":[75],"(student).":[77],"Previous":[78],"KD":[79,136,146],"approaches":[80],"have":[81],"commonly":[82],"designed":[83],"architecture":[85],"student":[88,116,193],"by":[89],"reducing":[90],"width":[92],"per":[93],"layer":[94],"or":[95],"number":[97,218],"layers":[99],"teacher.":[102,127],"This":[103],"structural":[104],"reduction":[105],"scheme":[106],"might":[107],"limit":[108],"flexibility":[110],"selection":[113],"since":[114],"structure":[118],"should":[119],"be":[120],"similar":[121],"that":[123,144,204,224],"given":[126],"cope":[129],"with":[130,186,196,233],"limitation,":[132],"we":[133,166,198],"propose":[134],"method":[137],"for":[138],"recognition,":[141],"namely":[142],"TutorNet,":[143],"applies":[145],"techniques":[147],"across":[148],"different":[149,234],"types":[150],"neural":[152],"networks":[153,232],"at":[154],"hidden":[156],"representation-level":[157,169],"as":[158,160],"well":[159],"output-level.":[162],"For":[163],"concrete":[164],"realizations,":[165],"firstly":[167],"apply":[168,179],"(RKD)":[172],"during":[173],"initialization":[175],"step,":[176],"then":[178],"softmax-level":[181],"(SKD)":[184],"combined":[185],"original":[188],"task":[189],"learning.":[190],"When":[191],"trained":[195],"RKD,":[197],"make":[199],"use":[200],"frame":[202],"weighting":[203],"points":[205],"out":[206],"frames":[208],"teacher":[212],"pays":[213],"more":[214],"attention.":[215],"Through":[216],"experiments,":[220],"it":[221],"verified":[223],"TutorNet":[225],"not":[226],"only":[227],"distills":[228],"between":[231],"topologies":[235],"but":[236],"also":[237],"significantly":[238],"contributes":[239],"improving":[241],"distilled":[246],"student.":[247]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2020-08-07T00:00:00"}
