{"id":"https://openalex.org/W4410313676","doi":"https://doi.org/10.1109/iccv51701.2025.01782","title":"Fine-Tuning Visual Autoregressive Models for Subject-Driven Generation","display_name":"Fine-Tuning Visual Autoregressive Models for Subject-Driven Generation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4410313676","doi":"https://doi.org/10.1109/iccv51701.2025.01782"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01782","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01782","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2504.02612","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104143036","display_name":"Jiwoo Chung","orcid":null},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jiwoo Chung","raw_affiliation_strings":["Sungkyunkwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057187648","display_name":"Sangeek Hyun","orcid":"https://orcid.org/0000-0002-4050-6896"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sangeek Hyun","raw_affiliation_strings":["Sungkyunkwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102859295","display_name":"Hyunjun Kim","orcid":"https://orcid.org/0009-0001-3539-2679"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyunjun Kim","raw_affiliation_strings":["Sungkyunkwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044234730","display_name":"Eun Hee Koh","orcid":"https://orcid.org/0000-0003-3829-0384"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Eunseo Koh","raw_affiliation_strings":["Sungkyunkwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114114639","display_name":"MinKyu Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"MinKyu Lee","raw_affiliation_strings":["Sungkyunkwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029469141","display_name":"Jae\u2010Pil Heo","orcid":"https://orcid.org/0000-0001-9684-7641"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jae-Pil Heo","raw_affiliation_strings":["Sungkyunkwan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University","institution_ids":["https://openalex.org/I848706"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5104143036"],"corresponding_institution_ids":["https://openalex.org/I848706"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07710434,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"19174","last_page":"19184"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9380999803543091,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8416776657104492},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5217419862747192},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.48759523034095764},{"id":"https://openalex.org/keywords/star-model","display_name":"STAR model","score":0.4597932994365692},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.3469381332397461},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3379659652709961},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22225934267044067},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2062038779258728},{"id":"https://openalex.org/keywords/autoregressive-integrated-moving-average","display_name":"Autoregressive integrated moving average","score":0.18299242854118347},{"id":"https://openalex.org/keywords/time-series","display_name":"Time series","score":0.1615748107433319}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8416776657104492},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5217419862747192},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.48759523034095764},{"id":"https://openalex.org/C194657046","wikidata":"https://www.wikidata.org/wiki/Q7394685","display_name":"STAR model","level":4,"score":0.4597932994365692},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3469381332397461},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3379659652709961},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22225934267044067},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2062038779258728},{"id":"https://openalex.org/C24338571","wikidata":"https://www.wikidata.org/wiki/Q2566298","display_name":"Autoregressive integrated moving average","level":3,"score":0.18299242854118347},{"id":"https://openalex.org/C151406439","wikidata":"https://www.wikidata.org/wiki/Q186588","display_name":"Time series","level":2,"score":0.1615748107433319},{"id":"https://openalex.org/C161191863","wikidata":"https://www.wikidata.org/wiki/Q199655","display_name":"Library science","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01782","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01782","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2504.02612","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.02612","pdf_url":"https://arxiv.org/pdf/2504.02612","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2504.02612","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.02612","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2504.02612","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.02612","pdf_url":"https://arxiv.org/pdf/2504.02612","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410313676.pdf","grobid_xml":"https://content.openalex.org/works/W4410313676.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2439807930","https://openalex.org/W2009692134","https://openalex.org/W1972271943","https://openalex.org/W2019155478","https://openalex.org/W2024529895","https://openalex.org/W2168175994","https://openalex.org/W1902630399","https://openalex.org/W2120434453","https://openalex.org/W3120578569","https://openalex.org/W1487412319"],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,38],"text-to-image":[3],"generative":[4],"models":[5,17,29],"have":[6,117],"enabled":[7],"numerous":[8],"practical":[9,64,178],"applications,":[10],"including":[11],"subject-driven":[12,76],"generation,":[13],"which":[14,49,130,144],"fine-tunes":[15],"pretrained":[16],"to":[18,83,100,106,152],"capture":[19],"subject":[20,125],"semantics":[21],"from":[22],"only":[23],"a":[24,118],"few":[25],"examples.":[26],"While":[27],"diffusion-based":[28,170],"produce":[30],"high-quality":[31],"images,":[32],"their":[33],"extensive":[34],"denoising":[35],"steps":[36],"result":[37],"significant":[39],"computational":[40,84],"overhead,":[41,85],"limiting":[42],"real-world":[43],"applicability.":[44],"Visual":[45],"autoregressive":[46],"(VAR)":[47],"models,":[48],"predict":[50],"next-scale":[51],"tokens":[52],"rather":[53],"than":[54,126],"spatially":[55],"adjacent":[56],"ones,":[57],"offer":[58],"significantly":[59,168],"faster":[60],"inference":[61],"suitable":[62],"for":[63,75,148],"deployment.":[65],"In":[66],"this":[67,137],"paper,":[68],"we":[69,95,111,139],"propose":[70,140],"the":[71,114,122,127,150,155],"first":[72],"VAR-based":[73],"approach":[74],"generation.":[77],"However,":[78],"naive":[79],"fine-tuning":[80],"VAR":[81],"leads":[82],"language":[86,108],"drift,":[87],"and":[88,103,175],"reduced":[89],"diversity.":[90],"To":[91],"address":[92],"these":[93],"challenges,":[94],"introduce":[96],"selective":[97],"layer":[98],"tuning":[99],"reduce":[101],"complexity":[102],"prior":[104],"distillation":[105],"mitigate":[107],"drift.":[109],"Additionally,":[110],"found":[112],"that":[113,165],"early":[115],"stages":[116],"greater":[119],"influence":[120],"on":[121,136,154],"generation":[123],"of":[124,159],"latter":[128],"stages,":[129],"merely":[131],"synthesize":[132],"minor":[133],"details.":[134,161],"Based":[135],"finding,":[138],"scale-wise":[141],"weighted":[142],"tuning,":[143],"prioritizes":[145],"coarser":[146],"resolutions":[147],"promoting":[149],"model":[151],"focus":[153],"subject-relevant":[156],"information":[157],"instead":[158],"local":[160],"Extensive":[162],"experiments":[163],"validate":[164],"our":[166],"method":[167],"outperforms":[169],"baselines":[171],"across":[172],"various":[173],"metrics":[174],"demonstrates":[176],"its":[177],"usage.":[179]},"counts_by_year":[],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
