{"id":"https://openalex.org/W4417449746","doi":"https://doi.org/10.48550/arxiv.2512.12604","title":"No Cache Left Idle: Accelerating diffusion model via Extreme-slimming Caching","display_name":"No Cache Left Idle: Accelerating diffusion model via Extreme-slimming Caching","publication_year":2025,"publication_date":"2025-12-14","ids":{"openalex":"https://openalex.org/W4417449746","doi":"https://doi.org/10.48550/arxiv.2512.12604"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.12604","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12604","pdf_url":"https://arxiv.org/pdf/2512.12604","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.12604","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wen, Tingyan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wen, Tingyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358581","display_name":"Haoyu Li","orcid":"https://orcid.org/0009-0004-9836-7747"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067474325","display_name":"Yihuang Chen","orcid":"https://orcid.org/0000-0002-2887-6317"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yihuang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101851239","display_name":"Xing Zhou","orcid":"https://orcid.org/0000-0001-6358-1621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068103748","display_name":"Lin Zhu","orcid":"https://orcid.org/0000-0003-1167-8274"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Lifei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100737125","display_name":"Xueqian Wang","orcid":"https://orcid.org/0000-0003-3542-0593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xueqian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.47110000252723694,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.47110000252723694,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.11150000244379044,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.08619999885559082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6427000164985657},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6154000163078308},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5591999888420105},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.525600016117096},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.43230000138282776},{"id":"https://openalex.org/keywords/reset","display_name":"Reset (finance)","score":0.4223000109195709},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.41780000925064087},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.40709999203681946},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.3741999864578247}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7633000016212463},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6427000164985657},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6154000163078308},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5591999888420105},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.525600016117096},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C2779795794","wikidata":"https://www.wikidata.org/wiki/Q7315343","display_name":"Reset (finance)","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.41780000925064087},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.40709999203681946},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.40400001406669617},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4034999907016754},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3871999979019165},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.3741999864578247},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.3714999854564667},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3684000074863434},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3522999882698059},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.3402000069618225},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.3384999930858612},{"id":"https://openalex.org/C177225278","wikidata":"https://www.wikidata.org/wiki/Q192674","display_name":"Factoring","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C76752949","wikidata":"https://www.wikidata.org/wiki/Q7607499","display_name":"Stencil","level":2,"score":0.32440000772476196},{"id":"https://openalex.org/C96147967","wikidata":"https://www.wikidata.org/wiki/Q190686","display_name":"Subroutine","level":2,"score":0.29589998722076416},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2816999852657318},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.271699994802475},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C2778583558","wikidata":"https://www.wikidata.org/wiki/Q771245","display_name":"Code reuse","level":3,"score":0.2524000108242035},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25130000710487366}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.12604","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12604","pdf_url":"https://arxiv.org/pdf/2512.12604","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.12604","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.12604","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.12604","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12604","pdf_url":"https://arxiv.org/pdf/2512.12604","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"models":[1],"achieve":[2],"remarkable":[3],"generative":[4],"quality,":[5],"but":[6,40,56],"computational":[7,59],"overhead":[8],"scales":[9],"with":[10,179],"step":[11],"count,":[12],"model":[13],"depth,":[14],"and":[15,87,125,133,154,168,177,189],"sequence":[16],"length.":[17],"Feature":[18],"caching":[19,102],"is":[20,54,74,141],"effective":[21],"since":[22],"adjacent":[23],"timesteps":[24],"yield":[25],"highly":[26],"similar":[27],"features.":[28],"However,":[29],"an":[30,117],"inherent":[31],"trade-off":[32],"remains:":[33],"aggressive":[34],"timestep":[35,113],"reuse":[36,53,110],"offers":[37],"large":[38],"speedups":[39],"can":[41],"easily":[42],"cross":[43],"the":[44,75,112,130,138,163],"critical":[45,139],"line,":[46,119],"hurting":[47],"fidelity,":[48],"while":[49],"block-":[50,124],"or":[51],"token-level":[52,126],"safer":[55],"yields":[57],"limited":[58],"savings.":[60],"We":[61],"present":[62],"X-Slim":[63,95,161],"(eXtreme-Slimming":[64],"Caching),":[65],"a":[66,97,104],"training-free,":[67],"cache-based":[68],"accelerator":[69],"that,":[70],"to":[71,79,116,122,128,143,156,175],"our":[72],"knowledge,":[73],"first":[76,108],"unified":[77],"framework":[78],"exploit":[80],"cacheable":[81],"redundancy":[82],"across":[83],"timesteps,":[84],"structure":[85],"(blocks),":[86],"space":[88],"(tokens).":[89],"Rather":[90],"than":[91],"simply":[92],"mixing":[93],"levels,":[94],"introduces":[96],"dual-threshold":[98],"controller":[99],"that":[100],"turns":[101],"into":[103],"push-then-polish":[105],"process:":[106],"it":[107,170,185],"pushes":[109],"at":[111],"level":[114],"up":[115,174],"early-warning":[118],"then":[120],"switches":[121],"lightweight":[123],"refresh":[127],"polish":[129],"remaining":[131],"redundancy,":[132],"triggers":[134],"full":[135],"inference":[136],"once":[137],"line":[140],"crossed":[142],"reset":[144],"accumulated":[145],"error.":[146],"At":[147],"each":[148],"level,":[149],"context-aware":[150],"indicators":[151],"decide":[152],"when":[153],"where":[155],"cache.":[157],"Across":[158],"diverse":[159],"tasks,":[160],"advances":[162],"speed-quality":[164],"frontier.":[165],"On":[166,183],"FLUX.1-dev":[167],"HunyuanVideo,":[169],"reduces":[171],"latency":[172],"by":[173,192],"4.97x":[176],"3.52x":[178],"minimal":[180],"perceptual":[181],"loss.":[182],"DiT-XL/2,":[184],"reaches":[186],"3.13x":[187],"acceleration":[188],"improves":[190],"FID":[191],"2.42":[193],"over":[194],"prior":[195],"methods.":[196]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-12-17T00:00:00"}
