{"id":"https://openalex.org/W7137823439","doi":"https://doi.org/10.1609/aaai.v40i3.37194","title":"SpikCommander: A High-performance Spiking Transformer with Multi-view Learning for Efficient Speech Command Recognition","display_name":"SpikCommander: A High-performance Spiking Transformer with Multi-view Learning for Efficient Speech Command Recognition","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137823439","doi":"https://doi.org/10.1609/aaai.v40i3.37194"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i3.37194","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i3.37194","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37194/41156","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37194/41156","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129704764","display_name":"Jiaqi Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaqi Wang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen\nPengcheng Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen\nPengcheng Laboratory","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101323908","display_name":"Liutao Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liutao Yu","raw_affiliation_strings":["Pengcheng Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017826936","display_name":"Xiongri Shen","orcid":"https://orcid.org/0000-0002-5655-6716"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongri Shen","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112839610","display_name":"Sihang Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sihang Guo","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058384477","display_name":"Chenlin Zhou","orcid":"https://orcid.org/0000-0001-9338-2308"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenlin Zhou","raw_affiliation_strings":["Peking University\nPengcheng Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peking University\nPengcheng Laboratory","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083096343","display_name":"Leilei Zhao","orcid":"https://orcid.org/0000-0003-2309-8832"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Leilei Zhao","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129703032","display_name":"Yi Zhong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102541","display_name":"Shenzhen Bay Laboratory","ror":"https://ror.org/00sdcjz77","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210102541"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zhong","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen\nGreat Bay University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen\nGreat Bay University","institution_ids":["https://openalex.org/I4210102541"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129656226","display_name":"Zhiguo Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiguo Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129652851","display_name":"Zhengyu Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengyu Ma","raw_affiliation_strings":["Pengcheng Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Pengcheng Laboratory","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0625,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"3","first_page":"2119","last_page":"2127"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.4796000123023987,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.4796000123023987,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.29030001163482666,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.08950000256299973,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6116999983787537},{"id":"https://openalex.org/keywords/spiking-neural-network","display_name":"Spiking neural network","score":0.520799994468689},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4050000011920929},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.40400001406669617},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.39239999651908875},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.38019999861717224},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.37689998745918274},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.3756999969482422}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8201000094413757},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6116999983787537},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5866000056266785},{"id":"https://openalex.org/C11731999","wikidata":"https://www.wikidata.org/wiki/Q9067355","display_name":"Spiking neural network","level":3,"score":0.520799994468689},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49470001459121704},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4050000011920929},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.40400001406669617},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.39239999651908875},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.38019999861717224},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.3732999861240387},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33559998869895935},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3255999982357025},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.3133000135421753},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.2800000011920929},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i3.37194","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i3.37194","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37194/41156","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i3.37194","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i3.37194","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37194/41156","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137823439.pdf","grobid_xml":"https://content.openalex.org/works/W7137823439.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Spiking":[0,121,126],"neural":[1],"networks":[2],"(SNNs)":[3],"offer":[4],"a":[5,67,86,95],"promising":[6],"path":[7],"toward":[8],"energy-efficient":[9],"speech":[10,36,77,162],"command":[11,163],"recognition":[12],"(SCR)":[13],"by":[14],"leveraging":[15],"their":[16],"event-driven":[17],"processing":[18],"paradigm.":[19],"However,":[20],"existing":[21],"SNN-based":[22],"SCR":[23],"methods":[24],"often":[25],"struggle":[26],"to":[27,38,71,102],"capture":[28],"rich":[29],"temporal":[30,40,74,105],"dependencies":[31,75],"and":[32,42,108,130,158],"contextual":[33,97],"information":[34],"from":[35],"due":[37],"limited":[39],"modeling":[41,107],"binary":[43],"spike-based":[44],"representations.":[45],"To":[46],"address":[47],"these":[48],"challenges,":[49],"we":[50,82],"first":[51],"introduce":[52],"the":[53,120,125,131],"multi-view":[54,68],"spiking":[55,63,96],"temporal-aware":[56,64],"self-attention":[57],"(MSTASA)":[58],"module,":[59],"which":[60],"combines":[61],"effective":[62],"attention":[65],"with":[66,94,148],"learning":[69],"framework":[70],"model":[72],"complementary":[73],"in":[76],"commands.":[78],"Building":[79],"on":[80,116],"MSTASA,":[81],"further":[83],"propose":[84],"SpikCommander,":[85],"fully":[87],"spike-driven":[88],"transformer":[89],"architecture":[90],"that":[91,140],"integrates":[92],"MSTASA":[93],"refinement":[98],"channel":[99],"MLP":[100],"(SCR-MLP)":[101],"jointly":[103],"enhance":[104],"context":[106],"channel-wise":[109],"feature":[110],"integration.":[111],"We":[112],"evaluate":[113],"our":[114],"method":[115],"three":[117],"benchmark":[118],"datasets:":[119],"Heidelberg":[122],"Dataset":[123],"(SHD),":[124],"Speech":[127,133],"Commands":[128,134],"(SSC),":[129],"Google":[132],"V2":[135],"(GSC).":[136],"Extensive":[137],"experiments":[138],"demonstrate":[139],"SpikCommander":[141],"consistently":[142],"outperforms":[143],"state-of-the-art":[144],"(SOTA)":[145],"SNN":[146],"approaches":[147],"fewer":[149],"parameters":[150],"under":[151],"comparable":[152],"time":[153],"steps,":[154],"highlighting":[155],"its":[156],"effectiveness":[157],"efficiency":[159],"for":[160],"robust":[161],"recognition.":[164]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
