{"id":"https://openalex.org/W7134803607","doi":"https://doi.org/10.48550/arxiv.2603.06875","title":"Stochastic Attention via Langevin Dynamics on the Modern Hopfield Energy","display_name":"Stochastic Attention via Langevin Dynamics on the Modern Hopfield Energy","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134803607","doi":"https://doi.org/10.48550/arxiv.2603.06875"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.06875","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06875","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.06875","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128677132","display_name":"Abdulrahman Alswaidan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Alswaidan, Abdulrahman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5003161155","display_name":"Jeffrey D. Varner","orcid":"https://orcid.org/0000-0002-2558-7026"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Varner, Jeffrey D.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5128677132"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7886999845504761,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7886999845504761,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.026399999856948853,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.023600000888109207,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/langevin-dynamics","display_name":"Langevin dynamics","score":0.6876999735832214},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5939000248908997},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.5878000259399414},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5752999782562256},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.5221999883651733},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4862000048160553},{"id":"https://openalex.org/keywords/stochastic-gradient-descent","display_name":"Stochastic gradient descent","score":0.4505000114440918},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4083999991416931}],"concepts":[{"id":"https://openalex.org/C2780004032","wikidata":"https://www.wikidata.org/wiki/Q6485978","display_name":"Langevin dynamics","level":2,"score":0.6876999735832214},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5939000248908997},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.5878000259399414},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5752999782562256},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.5221999883651733},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.4934999942779541},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49000000953674316},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4862000048160553},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.4505000114440918},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4083999991416931},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.4023999869823456},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.364300012588501},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.351500004529953},{"id":"https://openalex.org/C98951983","wikidata":"https://www.wikidata.org/wiki/Q7604341","display_name":"Stationary distribution","level":3,"score":0.35120001435279846},{"id":"https://openalex.org/C137109543","wikidata":"https://www.wikidata.org/wiki/Q554388","display_name":"Temperature gradient","level":2,"score":0.35089999437332153},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3483999967575073},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3411000072956085},{"id":"https://openalex.org/C2777577648","wikidata":"https://www.wikidata.org/wiki/Q584537","display_name":"Langevin equation","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.30970001220703125},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2806999981403351},{"id":"https://openalex.org/C8272713","wikidata":"https://www.wikidata.org/wiki/Q176737","display_name":"Stochastic process","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2630000114440918}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.06875","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06875","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.06875","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.06875","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8981070518493652}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Attention":[0],"heads":[1],"retrieve:":[2],"given":[3],"a":[4,8,26,41,46,80,96,140],"query,":[5],"they":[6],"return":[7],"softmax-weighted":[9],"average":[10],"of":[11,22],"stored":[12],"values.":[13],"We":[14,78,103],"show":[15],"that":[16,31,85,174,182],"this":[17],"computation":[18],"is":[19,76,118],"one":[20],"step":[21],"gradient":[23,63],"descent":[24],"on":[25,105,134],"classical":[27],"energy":[28,62],"function,":[29],"and":[30,122],"Langevin":[32],"sampling":[33],"from":[34,147,205],"the":[35,50,61,65,87,127,135,148,152,163,175,215],"corresponding":[36],"distribution":[37],"yields":[38],"stochastic":[39,116],"attention:":[40],"training-free":[42,176],"sampler":[43],"controlled":[44],"by":[45],"single":[47],"temperature.":[48],"Lowering":[49],"temperature":[51,90],"gives":[52,57],"exact":[53],"retrieval;":[54],"raising":[55],"it":[56],"open-ended":[58],"generation.":[59],"Because":[60],"equals":[64],"attention":[66,117,217],"map,":[67],"no":[68,211],"score":[69,177],"network,":[70],"training":[71],"loop,":[72],"or":[73],"learned":[74,129,183],"model":[75],"required.":[77],"derive":[79],"closed-form":[81],"entropy":[82],"inflection":[83],"condition":[84],"identifies":[86],"retrieval-to-generation":[88],"transition":[89],"for":[91,100],"any":[92],"memory":[93,194],"geometry,":[94],"with":[95],"scaling":[97],"law":[98],"$\u03b2^*\\!\\sim\\!\\sqrt{d}$":[99],"random":[101],"patterns.":[102],"validate":[104],"five":[106],"domains":[107],"(64":[108],"to":[109,200,214],"4,096":[110],"dimensions).":[111],"On":[112,144],"MNIST":[113],"digit":[114],"images,":[115],"$2.6{\\times}$":[119],"more":[120,124],"novel":[121],"$2.0{\\times}$":[123],"diverse":[125],"than":[126,162],"best":[128],"baseline":[130,189],"(a":[131],"VAE":[132,164],"trained":[133],"same":[136],"patterns),":[137],"while":[138],"matching":[139],"Metropolis-corrected":[141],"gold":[142],"standard.":[143],"protein":[145],"sequences":[146],"Pfam":[149],"RRM":[150],"family,":[151],"generation":[153],"regime":[154],"achieves":[155],"$6.9{\\times}$":[156],"lower":[157],"amino":[158],"acid":[159],"composition":[160],"divergence":[161],"(KL":[165],"$=":[166],"0.060$":[167],"vs.\\":[168],"$0.416$)":[169],"at":[170],"matched":[171],"novelty,":[172],"demonstrating":[173],"function":[178],"preserves":[179],"family-level":[180],"fidelity":[181],"models":[184],"lose.":[185],"A":[186],"denoising":[187],"diffusion":[188],"(DDPM)":[190],"fails":[191],"across":[192],"all":[193],"sizes":[195],"tested":[196],"($K":[197],"=":[198],"100$":[199],"$3{,}500$),":[201],"producing":[202],"samples":[203],"indistinguishable":[204],"isotropic":[206],"noise.":[207],"The":[208],"approach":[209],"requires":[210],"architectural":[212],"changes":[213],"underlying":[216],"mechanism.":[218]},"counts_by_year":[],"updated_date":"2026-03-13T14:20:09.374765","created_date":"2026-03-11T00:00:00"}
