{"id":"https://openalex.org/W7129075372","doi":"https://doi.org/10.48550/arxiv.2602.12675","title":"SLA2: Sparse-Linear Attention with Learnable Routing and QAT","display_name":"SLA2: Sparse-Linear Attention with Learnable Routing and QAT","publication_year":2026,"publication_date":"2026-02-13","ids":{"openalex":"https://openalex.org/W7129075372","doi":"https://doi.org/10.48550/arxiv.2602.12675"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.12675","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12675","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.12675","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126143517","display_name":"Jintao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Jintao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126154195","display_name":"Haoxu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Haoxu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121822756","display_name":"Kai Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126111138","display_name":"Kaiwen Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Kaiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126108886","display_name":"Youhe Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Youhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123878369","display_name":"Ion Stoica","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stoica, Ion","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chen, Jianfei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jianfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126168825","display_name":"Jun Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126083869","display_name":"Joseph E. Gonzalez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gonzalez, Joseph E.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5126143517"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.1266999989748001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.1266999989748001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.09870000183582306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.09009999781847,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5996999740600586},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5927000045776367},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5350000262260437},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5254999995231628},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.46860000491142273},{"id":"https://openalex.org/keywords/sparse-approximation","display_name":"Sparse approximation","score":0.4368000030517578}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6883000135421753},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5996999740600586},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5927000045776367},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5254999995231628},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.46860000491142273},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44609999656677246},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.4368000030517578},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3441999852657318},{"id":"https://openalex.org/C147297375","wikidata":"https://www.wikidata.org/wiki/Q6674930","display_name":"Look-ahead","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30239999294281006},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C6802819","wikidata":"https://www.wikidata.org/wiki/Q1072174","display_name":"Linear system","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C311688","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Time complexity","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.12675","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12675","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.12675","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12675","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse-Linear":[0],"Attention":[1],"(SLA)":[2],"combines":[3],"sparse":[4,33,66,88,109,117],"and":[5,12,61,67,96,110,114,147],"linear":[6,35,68,90,111],"attention":[7,51,84,99,112,120,124,145,151],"to":[8,31,106,130],"accelerate":[9],"diffusion":[10,139],"models":[11],"has":[13],"shown":[14],"strong":[15],"performance":[16],"in":[17,53],"video":[18,138],"generation.":[19],"However,":[20],"(i)":[21],"SLA":[22,60],"relies":[23],"on":[24,38,137],"a":[25,57,62,76,93,103,116],"heuristic":[26],"split":[27],"that":[28,79,101,136],"assigns":[29],"computations":[30],"the":[32,50,108],"or":[34,89],"branch":[36],"based":[37],"attention-weight":[39],"magnitude,":[40],"which":[41,73],"can":[42,142],"be":[43],"suboptimal.":[44],"Additionally,":[45],"(ii)":[46],"after":[47],"formally":[48],"analyzing":[49],"error":[52],"SLA,":[54],"we":[55],"identify":[56],"mismatch":[58],"between":[59],"direct":[63,97],"decomposition":[64],"into":[65],"attention.":[69],"We":[70],"propose":[71],"SLA2,":[72],"introduces":[74],"(I)":[75],"learnable":[77,104],"router":[78],"dynamically":[80],"selects":[81],"whether":[82],"each":[83],"computation":[85],"should":[86],"use":[87],"attention,":[91],"(II)":[92],"more":[94],"faithful":[95],"sparse-linear":[98],"formulation":[100],"uses":[102],"ratio":[105],"combine":[107],"branches,":[113],"(III)":[115],"+":[118],"low-bit":[119,123],"design,":[121],"where":[122],"is":[125],"introduced":[126],"via":[127],"quantization-aware":[128],"fine-tuning":[129],"reduce":[131],"quantization":[132],"error.":[133],"Experiments":[134],"show":[135],"models,":[140],"SLA2":[141],"achieve":[143],"97%":[144],"sparsity":[146],"deliver":[148],"an":[149],"18.6x":[150],"speedup":[152],"while":[153],"preserving":[154],"generation":[155],"quality.":[156]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-02-17T00:00:00"}
