{"id":"https://openalex.org/W7124152610","doi":"https://doi.org/10.48550/arxiv.2601.08131","title":"Attention Projection Mixing with Exogenous Anchors","display_name":"Attention Projection Mixing with Exogenous Anchors","publication_year":2026,"publication_date":"2026-01-13","ids":{"openalex":"https://openalex.org/W7124152610","doi":"https://doi.org/10.48550/arxiv.2601.08131"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.08131","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.08131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.08131","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111609459","display_name":"Jonathan K. Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Su, Jonathan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5111609459"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1671999990940094,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1671999990940094,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.1315000057220459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.09009999781847,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.6269999742507935},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5748999714851379},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.5715000033378601},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5701000094413757},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5465999841690063},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5174999833106995},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5101000070571899},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4523000121116638}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6378999948501587},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.6269999742507935},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5748999714851379},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.5715000033378601},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5701000094413757},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5465999841690063},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5174999833106995},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5101000070571899},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4553999900817871},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4523000121116638},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37279999256134033},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3700000047683716},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C2780551164","wikidata":"https://www.wikidata.org/wiki/Q2306599","display_name":"Column (typography)","level":3,"score":0.3246000111103058},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2922999858856201},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C186068551","wikidata":"https://www.wikidata.org/wiki/Q13255585","display_name":"Tension (geology)","level":3,"score":0.27959999442100525},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26350000500679016},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.08131","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.08131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.08131","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.08131","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cross-layer":[0],"reuse":[1],"of":[2,47],"early":[3],"attention":[4],"projections":[5,61],"can":[6],"improve":[7],"optimization":[8],"and":[9,33,79,90,92,111,156],"data":[10],"efficiency,":[11],"but":[12],"it":[13],"creates":[14],"a":[15,25,69],"structural":[16],"conflict:":[17],"the":[18,45,55,63,112],"first":[19],"layer":[20,65],"must":[21],"simultaneously":[22],"act":[23],"as":[24,34],"stable,":[26],"reusable":[27],"anchor":[28,60,97],"for":[29],"all":[30],"deeper":[31],"layers":[32,146],"an":[35,136],"effective":[36],"computational":[37],"block.":[38],"We":[39,50,67,131,153],"demonstrate":[40],"that":[41,74,95],"this":[42,133],"tension":[43],"constrains":[44],"performance":[46],"internal-anchor":[48,109],"designs.":[49],"propose":[51],"ExoFormer,":[52],"which":[53],"resolves":[54],"conflict":[56],"by":[57],"learning":[58],"exogenous":[59],"outside":[62],"sequential":[64],"stack.":[66],"introduce":[68],"unified":[70],"normalized":[71],"mixing":[72],"framework":[73],"mixes":[75],"queries,":[76],"keys,":[77],"values,":[78],"gate":[80],"logits":[81],"using":[82,124],"learnable":[83],"coefficients":[84],"(exploring":[85],"coefficient":[86],"granularities:":[87],"elementwise,":[88],"headwise,":[89],"scalar),":[91],"we":[93],"show":[94],"normalizing":[96],"sources":[98],"is":[99],"key":[100],"to":[101,147,158],"stable":[102],"reuse.":[103],"ExoFormer":[104],"variants":[105],"consistently":[106],"outperform":[107],"their":[108],"counterparts,":[110],"dynamic":[113],"variant":[114],"yields":[115],"1.5x":[116,125],"downstream":[117],"accuracy":[118],"points":[119],"while":[120],"matching":[121],"validation":[122],"loss":[123],"fewer":[126],"tokens":[127],"than":[128],"Gated":[129],"Attention.":[130],"explain":[132],"efficacy":[134],"via":[135],"Offloading":[137],"Hypothesis:":[138],"external":[139],"anchors":[140],"preserve":[141],"essential":[142],"token":[143],"identity,":[144],"allowing":[145],"specialize":[148],"exclusively":[149],"in":[150],"feature":[151],"transformation.":[152],"release":[154],"code":[155],"models":[157],"facilitate":[159],"future":[160],"research.":[161]},"counts_by_year":[],"updated_date":"2026-01-30T23:17:42.513302","created_date":"2026-01-15T00:00:00"}
