{"id":"https://openalex.org/W7154265324","doi":"https://doi.org/10.48550/arxiv.2604.11321","title":"Winner-Take-All Spiking Transformer for Language Modeling","display_name":"Winner-Take-All Spiking Transformer for Language Modeling","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154265324","doi":"https://doi.org/10.48550/arxiv.2604.11321"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.11321","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11321","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.11321","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133597794","display_name":"Chenlin Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Chenlin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133582658","display_name":"Sihang Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Sihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133595833","display_name":"Jiaqi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiaqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003880966","display_name":"Dongyang Ma","orcid":"https://orcid.org/0000-0002-5545-5876"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Dongyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059556545","display_name":"Kaiwei Che","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Che, Kaiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124325987","display_name":"Baiyu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Baiyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133590443","display_name":"Qingyan Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Qingyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133560204","display_name":"Zhengyu Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Zhengyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133552621","display_name":"Yonghong Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Yonghong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.6887000203132629,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.6887000203132629,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.06939999759197235,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.04149999842047691,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6662999987602234},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6452000141143799},{"id":"https://openalex.org/keywords/spiking-neural-network","display_name":"Spiking neural network","score":0.6403999924659729},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5831999778747559},{"id":"https://openalex.org/keywords/neuromorphic-engineering","display_name":"Neuromorphic engineering","score":0.5547000169754028},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.48500001430511475}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.751800000667572},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6662999987602234},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6452000141143799},{"id":"https://openalex.org/C11731999","wikidata":"https://www.wikidata.org/wiki/Q9067355","display_name":"Spiking neural network","level":3,"score":0.6403999924659729},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5831999778747559},{"id":"https://openalex.org/C151927369","wikidata":"https://www.wikidata.org/wiki/Q1981312","display_name":"Neuromorphic engineering","level":3,"score":0.5547000169754028},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5382999777793884},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.48500001430511475},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.3244999945163727},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32190001010894775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3131999969482422},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.26460000872612},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2612000107765198}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.11321","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11321","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.11321","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11321","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.4781704843044281,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Spiking":[0,14,87,93,103,113],"Transformers,":[1],"which":[2,55],"combine":[3],"the":[4,9,149,156],"scalability":[5],"of":[6,13,151,158],"Transformers":[7],"with":[8,45],"sparse,":[10],"energy-efficient":[11,166],"property":[12],"Neural":[15],"Networks":[16],"(SNNs),":[17],"have":[18],"achieved":[19],"impressive":[20],"results":[21],"in":[22],"neuromorphic":[23,64],"and":[24,27,60,78,90,110,144,154,165],"vision":[25,40],"tasks":[26,147],"attracted":[28],"increasing":[29],"attention.":[30],"However,":[31],"existing":[32],"directly":[33],"trained":[34,126],"spiking":[35,46,53,76,159],"transformers":[36,77,160],"primarily":[37],"focus":[38],"on":[39,51,97,135],"tasks.":[41,132],"For":[42],"language":[43,108,118,130,140,163],"modeling":[44,109,164],"transformer,":[47],"convergence":[48],"relies":[49],"heavily":[50],"softmax-based":[52],"self-attention,":[54],"incurs":[56],"high":[57],"energy":[58],"costs":[59],"poses":[61],"challenges":[62],"for":[63,106,116,128,161],"deployment.":[65],"To":[66],"address":[67],"this":[68],"issue,":[69],"we":[70,99],"introduce":[71],"Winner-Take-All":[72],"(WTA)":[73],"mechanisms":[74],"into":[75],"propose":[79],"two":[80],"novel":[81],"softmax-free,":[82,122],"spike-driven":[83],"self-attention":[84],"modules:":[85],"WTA":[86,92],"Self-Attention":[88,94],"(WSSA)":[89],"Causal":[91],"(CWSSA).":[95],"Based":[96],"them,":[98],"design":[100],"WTA-based":[101,111],"Encoder-only":[102],"Transformer":[104,114,124],"(WE-Spikingformer)":[105],"masked":[107],"Decoder-only":[112],"(WD-Spikingformer)":[115],"causal":[117],"modeling,":[119],"systematically":[120],"exploring":[121],"spiking-driven":[123],"architectures":[125],"end-to-end":[127],"natural":[129,139],"processing":[131],"Extensive":[133],"experiments":[134],"16":[136],"datasets":[137],"spanning":[138],"understanding,":[141],"question-answering":[142],"tasks,":[143],"commonsense":[145],"reasoning":[146],"validate":[148],"effectiveness":[150],"our":[152],"approach":[153],"highlight":[155],"promise":[157],"general":[162],"artificial":[167],"intelligence.":[168]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-15T00:00:00"}
