{"id":"https://openalex.org/W7164818465","doi":"https://doi.org/10.1145/3805622.3810814","title":"StepVAR: Structure-Texture Guided Pruning for Visual Autoregressive Models","display_name":"StepVAR: Structure-Texture Guided Pruning for Visual Autoregressive Models","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164818465","doi":"https://doi.org/10.1145/3805622.3810814"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810814","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810814","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810814","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102872767","display_name":"Keli Liu","orcid":"https://orcid.org/0000-0003-0815-7224"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keli Liu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0009-1915-0248","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338337","display_name":"Zi Wang","orcid":"https://orcid.org/0000-0002-7361-1406"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Wang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0005-7154-6478","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046805800","display_name":"Wengang Zhou","orcid":"https://orcid.org/0000-0003-1690-9836"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wengang Zhou","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0003-1690-9836","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078141810","display_name":"Houqiang Li","orcid":"https://orcid.org/0000-0003-2188-3028"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Houqiang Li","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0003-2188-3028","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93515804,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1626","last_page":"1634"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7416999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.7416999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13529999554157257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.03139999881386757,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6668999791145325},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.640500009059906},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5622000098228455},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5511000156402588},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46129998564720154},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.459199994802475},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3822999894618988},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.3750999867916107}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7601000070571899},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6668999791145325},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.640500009059906},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5622000098228455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5519000291824341},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5511000156402588},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46129998564720154},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3935000002384186},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3822999894618988},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3750999867916107},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.3628999888896942},{"id":"https://openalex.org/C50494287","wikidata":"https://www.wikidata.org/wiki/Q658467","display_name":"Texture synthesis","level":5,"score":0.35600000619888306},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.29190000891685486},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28790000081062317},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27239999175071716},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C4069607","wikidata":"https://www.wikidata.org/wiki/Q868732","display_name":"Aliasing","level":3,"score":0.2612999975681305},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810814","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810814","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810814","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810814","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W3180355996","https://openalex.org/W4312933868","https://openalex.org/W4399423212","https://openalex.org/W4402660110","https://openalex.org/W4402726956","https://openalex.org/W4402778510","https://openalex.org/W4411631887","https://openalex.org/W4413145894","https://openalex.org/W4413147266","https://openalex.org/W4415796345","https://openalex.org/W7133193464","https://openalex.org/W7133206201","https://openalex.org/W7133233895","https://openalex.org/W7160069725","https://openalex.org/W7160150027"],"related_works":[],"abstract_inverted_index":{"Visual":[0],"AutoRegressive":[1],"(VAR)":[2],"models":[3,165],"based":[4],"on":[5,54,159],"next-scale":[6,135],"prediction":[7,136],"enable":[8],"efficient":[9],"hierarchical":[10],"generation,":[11],"yet":[12],"the":[13,24,45,119],"inference":[14,84,171],"cost":[15],"grows":[16],"quadratically":[17],"at":[18],"high":[19],"resolutions.":[20],"We":[21],"observe":[22],"that":[23,43,81,167,183],"computationally":[25],"intensive":[26],"later":[27],"scales":[28,42],"predominantly":[29],"refine":[30],"high-frequency":[31,55],"textures":[32],"and":[33,64,89,129,162,178,193],"exhibit":[34],"substantial":[35,170],"spatial":[36],"redundancy,":[37],"in":[38],"contrast":[39],"to":[40,99,110,121,149],"earlier":[41],"determine":[44],"global":[46,67,112],"structural":[47,62,88,113],"layout.":[48],"Existing":[49],"pruning":[50,79],"methods":[51],"primarily":[52],"focus":[53],"detection":[56],"for":[57,125],"token":[58,78],"selection,":[59],"often":[60],"overlooking":[61],"coherence":[63],"consequently":[65],"degrading":[66],"semantics.":[68],"To":[69,132],"address":[70],"this":[71],"limitation,":[72],"we":[73,93,140],"propose":[74],"StepVAR,":[75],"a":[76,95,143],"training-free":[77],"framework":[80],"accelerates":[82],"VAR":[83,164,198],"by":[85],"jointly":[86],"considering":[87],"textural":[90],"importance.":[91],"Specifically,":[92],"employ":[94],"lightweight":[96],"high-pass":[97],"filter":[98],"capture":[100],"local":[101],"texture":[102],"details,":[103],"while":[104,173],"leveraging":[105],"Principal":[106],"Component":[107],"Analysis":[108],"(PCA)":[109],"preserve":[111],"information.":[114],"This":[115],"dual-criterion":[116],"design":[117],"enables":[118],"model":[120],"retain":[122],"tokens":[123],"critical":[124],"both":[126],"fine-grained":[127],"fidelity":[128],"overall":[130],"composition.":[131],"maintain":[133],"valid":[134],"under":[137],"sparse":[138],"tokens,":[139],"further":[141],"introduce":[142],"nearest":[144],"neighbor":[145],"feature":[146,152],"propagation":[147],"strategy":[148],"reconstruct":[150],"dense":[151],"maps":[153],"from":[154],"pruned":[155],"representations.":[156],"Extensive":[157],"experiments":[158],"state-of-the-art":[160],"text-to-image":[161],"text-to-video":[163],"demonstrate":[166],"StepVAR":[168],"achieves":[169],"speedups":[172],"maintaining":[174],"generation":[175],"quality.":[176],"Quantitative":[177],"qualitative":[179],"evaluations":[180],"consistently":[181],"show":[182],"our":[184],"method":[185],"outperforms":[186],"existing":[187],"acceleration":[188],"approaches,":[189],"validating":[190],"its":[191],"effectiveness":[192],"general":[194],"applicability":[195],"across":[196],"diverse":[197],"architectures.":[199]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
