{"id":"https://openalex.org/W4417095160","doi":"https://doi.org/10.48550/arxiv.2511.20698","title":"On the Role of Hidden States of Modern Hopfield Network in Transformer","display_name":"On the Role of Hidden States of Modern Hopfield Network in Transformer","publication_year":2025,"publication_date":"2025-11-24","ids":{"openalex":"https://openalex.org/W4417095160","doi":"https://doi.org/10.48550/arxiv.2511.20698"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2511.20698","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.20698","pdf_url":"https://arxiv.org/pdf/2511.20698","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2511.20698","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120696475","display_name":"Tsubasa Masumura","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Masumura, Tsubasa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5084053028","display_name":"Masato Taki","orcid":"https://orcid.org/0000-0002-5375-7862"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taki, Masato","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5120696475"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.22519999742507935,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.22519999742507935,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.09600000083446503,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.08829999715089798,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hopfield-network","display_name":"Hopfield network","score":0.8709999918937683},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7587000131607056},{"id":"https://openalex.org/keywords/bidirectional-associative-memory","display_name":"Bidirectional associative memory","score":0.6389999985694885},{"id":"https://openalex.org/keywords/content-addressable-memory","display_name":"Content-addressable memory","score":0.5169000029563904},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4320000112056732},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4041999876499176}],"concepts":[{"id":"https://openalex.org/C46421273","wikidata":"https://www.wikidata.org/wiki/Q1407668","display_name":"Hopfield network","level":3,"score":0.8709999918937683},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7587000131607056},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6622999906539917},{"id":"https://openalex.org/C120620853","wikidata":"https://www.wikidata.org/wiki/Q506355","display_name":"Bidirectional associative memory","level":4,"score":0.6389999985694885},{"id":"https://openalex.org/C53442348","wikidata":"https://www.wikidata.org/wiki/Q745101","display_name":"Content-addressable memory","level":3,"score":0.5169000029563904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47110000252723694},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4320000112056732},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4041999876499176},{"id":"https://openalex.org/C159423971","wikidata":"https://www.wikidata.org/wiki/Q177251","display_name":"Associative property","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C2778618852","wikidata":"https://www.wikidata.org/wiki/Q1128613","display_name":"Content-addressable storage","level":4,"score":0.31459999084472656},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2919999957084656},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28769999742507935},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2511.20698","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.20698","pdf_url":"https://arxiv.org/pdf/2511.20698","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2511.20698","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.20698","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2511.20698","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.20698","pdf_url":"https://arxiv.org/pdf/2511.20698","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Associative":[0],"memory":[1,21],"models":[2],"based":[3,9],"on":[4,10],"Hopfield":[5,39,78,109,190],"networks":[6,79,191],"and":[7,63,69,80,143,160],"self-attention":[8,51],"key-value":[11],"mechanisms":[12,22],"have":[13],"been":[14,28],"popular":[15],"approaches":[16],"in":[17,23,42,47,85,188],"the":[18,32,37,43,50,65,75,95,100,113,119,123,126,132,177,199],"study":[19],"of":[20,36,53,115,122,134,153],"deep":[24,154],"learning.":[25],"It":[26],"has":[27],"pointed":[29],"out":[30],"that":[31,74,145,166],"state":[33,97],"update":[34],"rule":[35],"modern":[38,108],"network":[40],"(MHN)":[41],"adiabatic":[44],"approximation":[45,62],"is":[46],"agreement":[48],"with":[49],"layer":[52,121],"Transformer.":[54],"In":[55,137],"this":[56,61],"paper,":[57],"we":[58,139],"go":[59],"beyond":[60],"investigate":[64],"relationship":[66],"between":[67,77],"MHN":[68],"self-attention.":[70,103],"Our":[71,182],"results":[72,183],"show":[73,140],"correspondence":[76],"Transformers":[81,155],"can":[82,168,192],"be":[83,193],"established":[84],"a":[86,92,185,194],"more":[87],"generalized":[88],"form":[89],"by":[90],"adding":[91,173],"new":[93,105,186],"variable,":[94],"hidden":[96,147],"derived":[98],"from":[99,118],"MHN,":[101],"to":[102,125,176],"This":[104],"attention":[106,110,116,135],"mechanism,":[107],"(MHA),":[111],"allows":[112],"inheritance":[114],"scores":[117],"input":[120],"Transformer":[124,179,200],"output":[127],"layer,":[128],"which":[129,189],"greatly":[130],"improves":[131],"nature":[133],"weights.":[136],"particular,":[138],"both":[141],"theoretically":[142],"empirically":[144],"MHA":[146,167],"states":[148],"significantly":[149],"improve":[150,170],"serious":[151],"problem":[152],"known":[156],"as":[157],"rank":[158],"collapse":[159],"token":[161],"uniformity.":[162],"We":[163],"also":[164],"confirm":[165],"systematically":[169],"accuracy":[171],"without":[172],"training":[174],"parameters":[175],"Vision":[178],"or":[180],"GPT.":[181],"provide":[184],"case":[187],"useful":[195],"perspective":[196],"for":[197],"improving":[198],"architecture.":[201]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-28T00:00:00"}
