{"id":"https://openalex.org/W7128779342","doi":"https://doi.org/10.48550/arxiv.2602.12271","title":"MonarchRT: Efficient Attention for Real-Time Video Generation","display_name":"MonarchRT: Efficient Attention for Real-Time Video Generation","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128779342","doi":"https://doi.org/10.48550/arxiv.2602.12271"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.12271","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125927420","display_name":"Krish Agarwal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Agarwal, Krish","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125901146","display_name":"Zhuoming Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhuoming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125982730","display_name":"Cheng Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Cheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125937140","display_name":"Yongqi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yongqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125964026","display_name":"Haizhong Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Haizhong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125937097","display_name":"Xun Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Xun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001041485","display_name":"Atri Rudra","orcid":"https://orcid.org/0000-0003-4136-4719"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rudra, Atri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111249534","display_name":"Beidi Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Beidi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5125927420"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8064000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8064000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.02329999953508377,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.015300000086426735,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.46299999952316284},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4284000098705292},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.40869998931884766},{"id":"https://openalex.org/keywords/quadratic-equation","display_name":"Quadratic equation","score":0.39739999175071716},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.39590001106262207},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.3874000012874603},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.374099999666214},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.33469998836517334},{"id":"https://openalex.org/keywords/computational-photography","display_name":"Computational photography","score":0.3188000023365021}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8259000182151794},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.46299999952316284},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4345000088214874},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4284000098705292},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.40869998931884766},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4020000100135803},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.39739999175071716},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.3874000012874603},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38109999895095825},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.374099999666214},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.33469998836517334},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3244999945163727},{"id":"https://openalex.org/C146044194","wikidata":"https://www.wikidata.org/wiki/Q5157334","display_name":"Computational photography","level":4,"score":0.3188000023365021},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C88626702","wikidata":"https://www.wikidata.org/wiki/Q1128903","display_name":"Continuation","level":2,"score":0.3021000027656555},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C81845259","wikidata":"https://www.wikidata.org/wiki/Q290117","display_name":"Quadratic programming","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2915000021457672},{"id":"https://openalex.org/C30814859","wikidata":"https://www.wikidata.org/wiki/Q4119603","display_name":"Video denoising","level":5,"score":0.2818000018596649},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2791999876499176},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.272599995136261},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.26750001311302185},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2621000111103058},{"id":"https://openalex.org/C148047603","wikidata":"https://www.wikidata.org/wiki/Q1014612","display_name":"Parallelizable manifold","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.25}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.12271","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.12271","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12271","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.12271","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5614630579948425}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Real-time":[0],"video":[1,62,107,200,239],"generation":[2,240],"with":[3,78,144,176,241],"Diffusion":[4],"Transformers":[5],"is":[6,64],"bottlenecked":[7],"by":[8,75],"the":[9,87,138,151,184,224,232],"quadratic":[10],"cost":[11],"of":[12,90,140,154,226],"3D":[13],"self-attention,":[14],"especially":[15],"in":[16,179,223],"real-time":[17,199,238],"regimes":[18],"that":[19,44,61,110,168],"are":[20],"both":[21],"few-step":[22],"and":[23,30,83,121,208,216],"autoregressive,":[24],"where":[25],"errors":[26],"compound":[27],"across":[28],"time":[29],"each":[31],"denoising":[32],"step":[33],"must":[34],"carry":[35],"substantially":[36],"more":[37],"information.":[38],"In":[39],"this":[40,97],"setting,":[41],"we":[42,59,99,127],"find":[43],"prior":[45],"sparse-attention":[46],"approximations":[47],"break":[48],"down,":[49],"despite":[50],"showing":[51],"strong":[52],"results":[53],"for":[54,106,162,198,231],"bidirectional,":[55],"many-step":[56],"diffusion.":[57],"Specifically,":[58],"observe":[60,167],"attention":[63,104,112,174,196],"not":[65],"reliably":[66],"sparse,":[67],"but":[68],"instead":[69],"combines":[70],"pronounced":[71],"periodic":[72],"structure":[73,120],"driven":[74],"spatiotemporal":[76],"position":[77],"dynamic,":[79],"sparse":[80,158,195],"semantic":[81],"correspondences":[82],"dense":[84],"mixing,":[85],"exceeding":[86],"representational":[88],"capacity":[89],"even":[91],"oracle":[92],"top-k":[93],"attention.":[94],"Building":[95],"on":[96,193,211,246],"insight,":[98],"propose":[100],"Monarch-RT,":[101],"a":[102,190,247],"structured":[103],"parameterization":[105,141,197],"diffusion":[108],"models":[109],"factorizes":[111],"using":[113],"Monarch":[114,125],"matrices.":[115],"Through":[116],"appropriately":[117],"aligned":[118],"block":[119],"our":[122],"extended":[123],"tiled":[124],"parameterization,":[126],"achieve":[128,236],"high":[129,152],"expressivity":[130],"while":[131],"preserving":[132],"computational":[133],"efficiency.":[134],"We":[135,148,165],"further":[136,166],"overcome":[137],"overhead":[139],"through":[142],"finetuning,":[143],"custom":[145],"Triton":[146],"kernels.":[147],"first":[149,233],"validate":[150],"efficacy":[153],"Monarch-RT":[155,169,189],"over":[156],"existing":[157],"baselines":[159],"designed":[160],"only":[161],"bidirectional":[163],"models.":[164],"attains":[170],"up":[171],"to":[172,183,235],"95%":[173],"sparsity":[175],"no":[177],"loss":[178],"quality":[180],"when":[181],"applied":[182],"state-of-the-art":[185],"model":[186],"Self-Forcing,":[187],"making":[188],"pioneering":[191],"work":[192],"highly-capable":[194],"generation.":[201],"Our":[202],"optimized":[203],"implementation":[204],"outperforms":[205],"FlashAttention-2,":[206],"FlashAttention-3,":[207],"FlashAttention-4":[209],"kernels":[210],"Nvidia":[212],"RTX":[213,249],"5090,":[214],"H100,":[215],"B200":[217],"GPUs":[218],"respectively,":[219],"providing":[220],"kernel":[221],"speedups":[222],"range":[225],"1.4-11.8X.":[227],"This":[228],"enables":[229],"us,":[230],"time,":[234],"true":[237],"Self-Forcing":[242],"at":[243],"16":[244],"FPS":[245],"single":[248],"5090.":[250]},"counts_by_year":[],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2026-02-14T00:00:00"}
