{"id":"https://openalex.org/W4224916413","doi":"https://doi.org/10.1109/icassp43922.2022.9747295","title":"LETR: A Lightweight and Efficient Transformer for Keyword Spotting","display_name":"LETR: A Lightweight and Efficient Transformer for Keyword Spotting","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4224916413","doi":"https://doi.org/10.1109/icassp43922.2022.9747295"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9747295","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747295","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110842343","display_name":"Kevin Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kevin Ding","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091168107","display_name":"Martin Zong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martin Zong","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060389661","display_name":"Jiakui Li","orcid":"https://orcid.org/0000-0002-6065-6648"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiakui Li","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003607066","display_name":"Baoxiang Li","orcid":"https://orcid.org/0009-0009-4490-2157"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baoxiang Li","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1801,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.89488471,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7987","last_page":"7991"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.813305139541626},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8120278120040894},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.7226780652999878},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.6902600526809692},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6346485614776611},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5737469792366028},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5008857250213623},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.482993483543396},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4332907795906067},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4017412066459656},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3417567014694214},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.2310180366039276},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.12000203132629395},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09608858823776245},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.08848440647125244}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.813305139541626},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8120278120040894},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.7226780652999878},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.6902600526809692},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6346485614776611},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5737469792366028},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5008857250213623},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.482993483543396},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4332907795906067},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4017412066459656},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3417567014694214},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.2310180366039276},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.12000203132629395},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09608858823776245},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08848440647125244}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9747295","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9747295","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1999121090","https://openalex.org/W2034940213","https://openalex.org/W2148154194","https://openalex.org/W2194775991","https://openalex.org/W2565639579","https://openalex.org/W2748659049","https://openalex.org/W2797583228","https://openalex.org/W2888641632","https://openalex.org/W2899663614","https://openalex.org/W2936774411","https://openalex.org/W2963925437","https://openalex.org/W2963977978","https://openalex.org/W2973226577","https://openalex.org/W3015399080","https://openalex.org/W3025581723","https://openalex.org/W3094502228","https://openalex.org/W3095321517","https://openalex.org/W3097018422","https://openalex.org/W3106728613","https://openalex.org/W3121523901","https://openalex.org/W3136416617","https://openalex.org/W3139049060","https://openalex.org/W3163237592","https://openalex.org/W3170874841","https://openalex.org/W3198035615","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6750665317","https://openalex.org/W6755977528","https://openalex.org/W6784333009","https://openalex.org/W6788135285","https://openalex.org/W6792695861"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W3119978414","https://openalex.org/W2114097550","https://openalex.org/W3206647229","https://openalex.org/W4286904253","https://openalex.org/W2000885660","https://openalex.org/W1969408022","https://openalex.org/W2117995638","https://openalex.org/W1989658893","https://openalex.org/W2545741539"],"abstract_inverted_index":{"Transformer":[0,44,102],"recently":[1],"has":[2,134],"achieved":[3,135],"impressive":[4],"success":[5],"in":[6,56,73],"a":[7,41,57,97,118],"number":[8],"of":[9,20,43,69,90,121],"domains,":[10],"including":[11,78],"machine":[12],"translation,":[13],"image":[14],"recognition,":[15],"and":[16,54,65,85,99],"speech":[17],"recognition.":[18],"Most":[19],"the":[21,50,63,67,91,95,144],"previous":[22],"work":[23],"on":[24,110,127],"Keyword":[25],"Spotting":[26],"(KWS)":[27],"is":[28],"built":[29],"upon":[30],"convolutional":[31],"or":[32],"recurrent":[33],"neural":[34],"networks.":[35],"In":[36],"this":[37],"paper,":[38],"we":[39,93],"explore":[40],"family":[42],"architectures":[45],"for":[46,103],"keyword":[47],"spotting,":[48],"optimizing":[49],"trade-off":[51],"between":[52],"accuracy":[53],"efficiency":[55,108],"high-speed":[58],"regime.":[59],"We":[60,105],"also":[61],"studied":[62],"effectiveness":[64],"summarized":[66],"principles":[68],"applying":[70],"key":[71],"components":[72],"vision":[74],"Transformers":[75],"to":[76,116,143],"KWS,":[77],"patch":[79],"embedding,":[80],"position":[81],"encoding,":[82],"attention":[83],"mechanism,":[84],"class":[86],"token.":[87],"On":[88],"top":[89],"findings,":[92],"propose":[94],"LeTR:":[96],"lightweight":[98],"highly":[100],"efficient":[101],"KWS.":[104],"consider":[106],"different":[107,111],"measures":[109],"edge":[112],"devices":[113],"so":[114],"as":[115],"reflect":[117],"wide":[119],"range":[120],"application":[122],"scenarios":[123],"best.":[124],"Experimental":[125],"results":[126,137],"two":[128],"common":[129],"benchmarks":[130],"demonstrate":[131],"that":[132],"LeTR":[133],"state-of-the-art":[136],"over":[138],"competing":[139],"methods":[140],"with":[141],"respect":[142],"speed/accuracy":[145],"trade-off.":[146]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":9}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
