{"id":"https://openalex.org/W7128793353","doi":"https://doi.org/10.48550/arxiv.2602.11451","title":"LoopFormer: Elastic-Depth Looped Transformers for Latent Reasoning via Shortcut Modulation","display_name":"LoopFormer: Elastic-Depth Looped Transformers for Latent Reasoning via Shortcut Modulation","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128793353","doi":"https://doi.org/10.48550/arxiv.2602.11451"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11451","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jeddi, Ahmadreza","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jeddi, Ahmadreza","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102861496","display_name":"Marco Ciccone","orcid":"https://orcid.org/0000-0002-3306-1323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ciccone, Marco","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089641181","display_name":"B Taati","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taati, Babak","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.43070000410079956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.43070000410079956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.12030000239610672,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.06419999897480011,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7077000141143799},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6477000117301941},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.4564000070095062},{"id":"https://openalex.org/keywords/feedback-loop","display_name":"Feedback loop","score":0.32089999318122864},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.30550000071525574},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.30390000343322754}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7077000141143799},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6765999794006348},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6477000117301941},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.4564000070095062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4242999851703644},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32330000400543213},{"id":"https://openalex.org/C186886427","wikidata":"https://www.wikidata.org/wiki/Q5441213","display_name":"Feedback loop","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25609999895095825}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11451","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11451","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11451","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11451","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Looped":[0],"Transformers":[1,169],"have":[2],"emerged":[3],"as":[4],"an":[5,37],"efficient":[6],"and":[7,29,54,124,150,182],"powerful":[8],"class":[9],"of":[10,49,60,99,134],"models":[11,23,63],"for":[12,173],"reasoning":[13,30,151],"in":[14],"the":[15,47,58,121],"language":[16,148,175,185],"domain.":[17],"Recent":[18],"studies":[19],"show":[20,166],"that":[21,33,96,103,167],"these":[22,62],"achieve":[24],"strong":[25],"performance":[26,146],"on":[27,81,120,147],"algorithmic":[28],"tasks,":[31],"suggesting":[32],"looped":[34,78,168],"architectures":[35],"possess":[36],"inductive":[38],"bias":[39],"toward":[40,180],"latent":[41],"reasoning.":[42,87],"However,":[43],"prior":[44],"approaches":[45],"fix":[46],"number":[48],"loop":[50,119],"iterations":[51],"during":[52],"training":[53,94],"inference,":[55],"leaving":[56],"open":[57],"question":[59],"whether":[61],"can":[64],"flexibly":[65],"adapt":[66],"their":[67],"computational":[68],"depth":[69],"under":[70,154],"variable":[71],"compute":[72,156],"budgets.":[73],"We":[74],"introduce":[75],"LoopFormer,":[76],"a":[77,92,178],"Transformer":[79],"trained":[80],"variable-length":[82],"trajectories":[83,98,133],"to":[84,113,129],"enable":[85],"budget-conditioned":[86],"Our":[88],"core":[89],"contribution":[90],"is":[91],"shortcut-consistency":[93],"scheme":[95],"aligns":[97],"different":[100],"lengths,":[101],"ensuring":[102],"shorter":[104],"loops":[105,111],"yield":[106],"informative":[107],"representations":[108,128],"while":[109,158],"longer":[110],"continue":[112],"refine":[114],"them.":[115],"LoopFormer":[116,143],"conditions":[117],"each":[118],"current":[122],"time":[123],"step":[125],"size,":[126],"enabling":[127],"evolve":[130],"consistently":[131],"across":[132],"varying":[135],"length":[136],"rather":[137],"than":[138],"drifting":[139],"or":[140],"stagnating.":[141],"Empirically,":[142],"demonstrates":[144],"robust":[145],"modeling":[149],"benchmarks":[152],"even":[153],"aggressive":[155],"constraints,":[157],"scaling":[159],"gracefully":[160],"with":[161],"additional":[162],"budget.":[163],"These":[164],"results":[165],"are":[170],"inherently":[171],"suited":[172],"adaptive":[174],"modeling,":[176],"opening":[177],"path":[179],"controllable":[181],"budget-aware":[183],"large":[184],"models.":[186]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-14T00:00:00"}
