{"id":"https://openalex.org/W7134273919","doi":"https://doi.org/10.48550/arxiv.2603.06492","title":"NOBLE: Accelerating Transformers with Nonlinear Low-Rank Branches","display_name":"NOBLE: Accelerating Transformers with Nonlinear Low-Rank Branches","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134273919","doi":"https://doi.org/10.48550/arxiv.2603.06492"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.06492","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125080717","display_name":"Ethan Smith","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105067","display_name":"Canon Anelva (Japan)","ror":"https://ror.org/01jfy6x14","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210105067"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Smith, Ethan","raw_affiliation_strings":["Canva Research"],"affiliations":[{"raw_affiliation_string":"Canva Research","institution_ids":["https://openalex.org/I4210105067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5125080717"],"corresponding_institution_ids":["https://openalex.org/I4210105067"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.42559999227523804,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.42559999227523804,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.2484000027179718,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.07280000299215317,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.6266000270843506},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.47690001130104065},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4611000120639801},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4278999865055084},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.36230000853538513},{"id":"https://openalex.org/keywords/trigonometric-functions","display_name":"Trigonometric functions","score":0.35920000076293945}],"concepts":[{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.6266000270843506},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5223000049591064},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.47690001130104065},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4611000120639801},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4535999894142151},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4278999865055084},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C178009071","wikidata":"https://www.wikidata.org/wiki/Q93344","display_name":"Trigonometric functions","level":2,"score":0.35920000076293945},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.35580000281333923},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.35280001163482666},{"id":"https://openalex.org/C176605952","wikidata":"https://www.wikidata.org/wiki/Q827674","display_name":"Phasor","level":4,"score":0.3434000015258789},{"id":"https://openalex.org/C194051981","wikidata":"https://www.wikidata.org/wiki/Q1337691","display_name":"Economic shortage","level":3,"score":0.2989000082015991},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.27720001339912415},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25279998779296875}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.06492","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.06492","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06492","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.06492","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,67,157],"introduce":[1],"NOBLE":[2,29,98,198],"(Nonlinear":[3],"lOw-rank":[4],"Branch":[5],"for":[6,32,50],"Linear":[7],"Enhancement),":[8],"an":[9,48],"architectural":[10],"augmentation":[11,162],"that":[12,74,189],"adds":[13],"nonlinear":[14],"low-rank":[15],"branches":[16],"to":[17,47,106,110,116,136,193],"transformer":[18],"linear":[19,87],"layers.":[20],"Unlike":[21],"LoRA":[22],"and":[23,72,83,128,145,150],"other":[24,172],"parameter-efficient":[25],"fine-tuning":[26],"(PEFT)":[27],"methods,":[28],"is":[30,38,63,183],"designed":[31],"pretraining":[33],"from":[34],"scratch.":[35],"The":[36,57],"branch":[37,58],"a":[39,64,76,86],"permanent":[40],"part":[41],"of":[42,54,205],"the":[43,93,194,206],"architecture":[44],"as":[45,122,124],"opposed":[46],"adapter":[49],"finetuning":[51],"on":[52,142],"top":[53],"frozen":[55],"weights.":[56],"computes":[59],"\u03c3(xWdown)Wup":[60],"where":[61],"\u03c3":[62],"learnable":[65,81],"nonlinearity.":[66],"evaluate":[68],"several":[69],"activation":[70],"functions":[71],"find":[73],"CosNet,":[75],"two-layer":[77],"cosine":[78],"nonlinearity":[79],"with":[80,85,102,121,164,171],"frequency":[82],"phase":[84],"projection":[88],"in":[89,92,134,167,202],"between":[90],"them":[91],"bottleneck":[94],"space,":[95],"performs":[96],"best.":[97],"achieves":[99],"substantial":[100],"improvements":[101],"minimal":[103],"overhead:":[104],"up":[105,135],"1.47x":[107],"step":[108,130],"speedup":[109],"reach":[111],"baseline":[112],"eval":[113],"loss":[114],"(up":[115],"32%":[117],"fewer":[118],"training":[119,155],"steps),":[120],"low":[123],"4%":[125],"additional":[126],"parameters":[127],"7%":[129],"time":[131],"overhead,":[132],"resulting":[133],"1.22x":[137],"net":[138],"wallclock":[139],"speedup.":[140],"Experiments":[141],"LLMs":[143],"(250M":[144],"1.5B":[146],"parameters),":[147],"BERT,":[148],"VQGAN,":[149],"ViT":[151,178],"consistently":[152],"show":[153],"improved":[154],"efficiency.":[156],"identify":[158],"one":[159],"caveat:":[160],"Mixup/CutMix":[161],"interferes":[163],"NOBLE's":[165],"benefits":[166],"Imagenet":[168],"classification":[169],"along":[170],"stochastic":[173],"augmentations,":[174],"but":[175],"when":[176],"disabled,":[177],"also":[179],"improves.":[180],"This":[181],"discrepancy":[182],"possibly":[184],"explained":[185],"by":[186],"regularization":[187],"techniques":[188],"encourage":[190],"smoother":[191],"fits":[192],"target":[195,207],"function":[196],"while":[197],"may":[199],"specialize":[200],"more":[201],"sharper":[203],"aspects":[204],"function.":[208]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-10T00:00:00"}
