{"id":"https://openalex.org/W4409754712","doi":"https://doi.org/10.1007/s10994-025-06768-3","title":"Drop-in efficient self-attention approximation method","display_name":"Drop-in efficient self-attention approximation method","publication_year":2025,"publication_date":"2025-04-24","ids":{"openalex":"https://openalex.org/W4409754712","doi":"https://doi.org/10.1007/s10994-025-06768-3"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-025-06768-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06768-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06768-3.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06768-3.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109070042","display_name":"Damien Fran\u00e7ois","orcid":null},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]},{"id":"https://openalex.org/I4210134190","display_name":"Recherches Scientifiques Luxembourg","ror":"https://ror.org/0333e3w09","country_code":"LU","type":"facility","lineage":["https://openalex.org/I4210134190"]}],"countries":["LU"],"is_corresponding":true,"raw_author_name":"Damien Fran\u00e7ois","raw_affiliation_strings":["SnT, University of Luxembourg, 6 Rue Richard Coudenhove-Kalergi, 1359 Kirchberg, Luxembourg, Luxembourg"],"affiliations":[{"raw_affiliation_string":"SnT, University of Luxembourg, 6 Rue Richard Coudenhove-Kalergi, 1359 Kirchberg, Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I4210134190","https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095695536","display_name":"Mathis Saillot","orcid":"https://orcid.org/0000-0002-3107-3128"},"institutions":[{"id":"https://openalex.org/I1288283403","display_name":"Georgia Tech Lorraine","ror":"https://ror.org/02m5dy175","country_code":"FR","type":"education","lineage":["https://openalex.org/I1288283403"]},{"id":"https://openalex.org/I4387155471","display_name":"Laboratoire de G\u00e9nie Informatique, de Production et de Maintenance","ror":"https://ror.org/04xykxd72","country_code":null,"type":"facility","lineage":["https://openalex.org/I4210150706","https://openalex.org/I4387155471","https://openalex.org/I90183372","https://openalex.org/I90183372"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mathis Saillot","raw_affiliation_strings":["LGIPM, Universit\u00e9 de Lorraine, 3 rue Augustin Fresnel,  57070, Metz, France"],"affiliations":[{"raw_affiliation_string":"LGIPM, Universit\u00e9 de Lorraine, 3 rue Augustin Fresnel,  57070, Metz, France","institution_ids":["https://openalex.org/I1288283403","https://openalex.org/I4387155471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040326968","display_name":"Jacques Klein","orcid":"https://orcid.org/0000-0003-4052-475X"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]},{"id":"https://openalex.org/I4210134190","display_name":"Recherches Scientifiques Luxembourg","ror":"https://ror.org/0333e3w09","country_code":"LU","type":"facility","lineage":["https://openalex.org/I4210134190"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Jacques Klein","raw_affiliation_strings":["SnT, University of Luxembourg, 6 Rue Richard Coudenhove-Kalergi, 1359 Kirchberg, Luxembourg, Luxembourg"],"affiliations":[{"raw_affiliation_string":"SnT, University of Luxembourg, 6 Rue Richard Coudenhove-Kalergi, 1359 Kirchberg, Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I4210134190","https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082835974","display_name":"Tegawend\u00e9 F. Bissyand\u00e9","orcid":"https://orcid.org/0000-0001-7270-9869"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]},{"id":"https://openalex.org/I4210134190","display_name":"Recherches Scientifiques Luxembourg","ror":"https://ror.org/0333e3w09","country_code":"LU","type":"facility","lineage":["https://openalex.org/I4210134190"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Tegawend\u00e9 F. Bissyand\u00e9","raw_affiliation_strings":["SnT, University of Luxembourg, 6 Rue Richard Coudenhove-Kalergi, 1359 Kirchberg, Luxembourg, Luxembourg"],"affiliations":[{"raw_affiliation_string":"SnT, University of Luxembourg, 6 Rue Richard Coudenhove-Kalergi, 1359 Kirchberg, Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I4210134190","https://openalex.org/I186903577"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055431881","display_name":"Alexander Skupin","orcid":"https://orcid.org/0000-0002-8955-8304"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]},{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["LU","US"],"is_corresponding":false,"raw_author_name":"Alexander Skupin","raw_affiliation_strings":["Department of Neurosciences, University of California San Diego, 9500 Gilman Drive, 92093, La Jolla, California, USA","Department of Physics and Materials Science, University of Luxembourg,  162A Av. de la Faiencerie,  1511, Luxembourg, Luxembourg","LCSB, University of Luxembourg, 6 Av. du Swing, 4367, Belvaux, Luxembourg"],"affiliations":[{"raw_affiliation_string":"Department of Neurosciences, University of California San Diego, 9500 Gilman Drive, 92093, La Jolla, California, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Physics and Materials Science, University of Luxembourg,  162A Av. de la Faiencerie,  1511, Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I186903577"]},{"raw_affiliation_string":"LCSB, University of Luxembourg, 6 Av. du Swing, 4367, Belvaux, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5109070042"],"corresponding_institution_ids":["https://openalex.org/I186903577","https://openalex.org/I4210134190"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0375721,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"114","issue":"6","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45226752758026123},{"id":"https://openalex.org/keywords/drop","display_name":"Drop (telecommunication)","score":0.43319591879844666},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3515186607837677}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45226752758026123},{"id":"https://openalex.org/C2781345722","wikidata":"https://www.wikidata.org/wiki/Q5308388","display_name":"Drop (telecommunication)","level":2,"score":0.43319591879844666},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3515186607837677},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s10994-025-06768-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06768-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06768-3.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"pmh:oai:orbilu.uni.lu:10993/65010","is_oa":true,"landing_page_url":"https://orbilu.uni.lu/handle/10993/65010","pdf_url":null,"source":{"id":"https://openalex.org/S4306401815","display_name":"Open Repository and Bibliography (University of Luxembourg)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I186903577","host_organization_name":"University of Luxembourg","host_organization_lineage":["https://openalex.org/I186903577"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Machine Learning, 114 (6) (2025-04-25)","raw_type":"peer reviewed"},{"id":"pmh:oai:HAL:hal-05289407v1","is_oa":true,"landing_page_url":"https://hal.science/hal-05289407","pdf_url":null,"source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning, 2025, 114 (6), pp.139. &#x27E8;10.1007/s10994-025-06768-3&#x27E9;","raw_type":"Journal articles"}],"best_oa_location":{"id":"doi:10.1007/s10994-025-06768-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06768-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06768-3.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320310700","display_name":"Universit\u00e9 du Luxembourg","ror":"https://ror.org/036x5ad56"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409754712.pdf","grobid_xml":"https://content.openalex.org/works/W4409754712.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W1974511160","https://openalex.org/W1998269045","https://openalex.org/W2117539524","https://openalex.org/W2178628967","https://openalex.org/W2914105075","https://openalex.org/W2954698171","https://openalex.org/W2963393721","https://openalex.org/W2964110616","https://openalex.org/W3034573343","https://openalex.org/W3094502228","https://openalex.org/W3106298483","https://openalex.org/W3118608800","https://openalex.org/W3131922516","https://openalex.org/W3173365702","https://openalex.org/W4281758439","https://openalex.org/W4310514254","https://openalex.org/W4384648639","https://openalex.org/W4385768058","https://openalex.org/W6676984168","https://openalex.org/W6739901393","https://openalex.org/W6763509872","https://openalex.org/W6771626834","https://openalex.org/W6779163297","https://openalex.org/W6785783668","https://openalex.org/W6796753453","https://openalex.org/W6843425716"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Abstract":[0],"Transformers":[1,84,178,192],"have":[2,13,157,163,171],"achieved":[3],"state-of-the-art":[4],"performance":[5,273],"in":[6,46,197,229,266,274],"most":[7],"common":[8],"tasks":[9,260],"to":[10,20,30,57,72,201,251,279],"which":[11],"they":[12],"been":[14],"applied.":[15],"Those":[16],"achievements":[17],"are":[18,102],"attributed":[19],"the":[21,32,110,114,222],"Self-Attention":[22,27,52,61,154,219,243],"mechanism":[23],"at":[24],"their":[25,135],"core.":[26],"is":[28,235],"understood":[29],"map":[31],"relationship":[33],"between":[34],"tokens":[35],"of":[36,77,96,113,134,153],"any":[37],"given":[38],"sequence.":[39],"This":[40,226],"exhaustive":[41],"mapping":[42],"incurs":[43],"massive":[44],"costs":[45],"memory":[47,68,79,160,267],"and":[48,67,80,92,145,162,205,254,261],"inference":[49,206],"time,":[50],"as":[51,86,237,269,271],"scales":[53],"quadratically":[54],"with":[55,137,244],"regard":[56],"sequence":[58],"length.":[59],"Standard":[60],"has":[62],"required":[63],"increasingly":[64],"large":[65],"compute":[66,166],"usage":[69],"when":[70],"applied":[71,196],"long":[73],"input":[74],"sequences":[75],"because":[76,133],"this":[78,209],"time":[81],"bottleneck.":[82],"Efficient":[83,177,191,255],"emerged":[85],"performant":[87],"alternatives":[88],"demonstrating":[89],"good":[90],"scalability":[91],"occasionally":[93],"better":[94],"tracking":[95],"long-range":[97],"dependencies.":[98],"Their":[99],"efficiency":[100],"gains":[101],"obtained":[103],"through":[104,117],"different":[105],"methods,":[106],"usually":[107],"focusing":[108],"on":[109,183,258],"linear":[111,215],"scaling":[112],"attention":[115],"matrix":[116],"sparsification,":[118],"approximation,":[119],"or":[120,185],"other":[121,146],"methods.":[122,281],"Among":[123],"existing":[124],"approaches,":[125],"those":[126],"using":[127],"low-rank":[128],"approximation":[129,216,224],"present":[130],"particular":[131],"advantages":[132],"compatibility":[136,174],"standard":[138,242,252],"Self-Attention-based":[139],"models,":[140],"allowing":[141],"for":[142,181,218,241],"weight":[143],"transfers":[144],"time-saving":[147],"schemes.":[148],"More":[149],"recently,":[150],"hardware-aware":[151,169,199],"versions":[152,231],"(e.g.,":[155],"FlashAttention)":[156],"mitigated":[158],"all":[159],"bottlenecks":[161],"alleviated":[164],"its":[165],"burden.":[167],"Unfortunately,":[168],"Self-Attentions":[170],"stricter":[172],"hardware":[173],"requirements":[175],"making":[176],"still":[179],"relevant":[180],"use":[182],"older":[184],"less":[186],"powerful":[187],"hardware.":[188],"Furthermore,":[189],"some":[190],"can":[193],"even":[194,277],"be":[195],"an":[198],"manner":[200],"further":[202],"improve":[203],"training":[204,275],"speed.":[207],"In":[208],"paper,":[210],"we":[211],"propose":[212],"a":[213,238,263],"novel":[214],"method":[217,248],"inspired":[220],"by":[221],"CUR":[223],"method.":[225],"method,":[227],"proposed":[228],"two":[230],"(one":[232],"leveraging":[233],"FlashAttention),":[234],"conceived":[236],"drop-in":[239],"replacement":[240],"weights":[245],"compatibility.":[246],"Our":[247],"compares":[249],"favorably":[250],"Transformers\u2019":[253,256],"performances":[257],"varied":[259],"demonstrates":[262],"significant":[264],"decrease":[265],"footprint":[268],"well":[270],"competitive":[272],"speed,":[276],"compared":[278],"similar":[280]},"counts_by_year":[],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
