{"id":"https://openalex.org/W4403791497","doi":"https://doi.org/10.1145/3664647.3680701","title":"GPD-VVTO: Preserving Garment Details in Video Virtual Try-On","display_name":"GPD-VVTO: Preserving Garment Details in Video Virtual Try-On","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791497","doi":"https://doi.org/10.1145/3664647.3680701"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3680701","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680701","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012522719","display_name":"Yuanbin Wang","orcid":"https://orcid.org/0009-0009-1238-1692"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuanbin Wang","raw_affiliation_strings":["School of Artificial Intelligence, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101363196","display_name":"Weilun Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weilun Dai","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111425337","display_name":"L.W. Chan","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Chan","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Huanyu Zhou","orcid":"https://orcid.org/0009-0008-7839-3154"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huanyu Zhou","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049610245","display_name":"Aixi Zhang","orcid":"https://orcid.org/0000-0001-9863-0091"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aixi Zhang","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100330138","display_name":"Si Liu","orcid":"https://orcid.org/0000-0002-9180-2935"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Si Liu","raw_affiliation_strings":["School of Artificial Intelligence, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5012522719"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":0.7326,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72684326,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"7133","last_page":"7142"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.740308940410614},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.6763320565223694},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4232550263404846},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36407557129859924},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.32520633935928345}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.740308940410614},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.6763320565223694},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4232550263404846},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36407557129859924},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32520633935928345}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3680701","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3680701","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2331128040","https://openalex.org/W2962785568","https://openalex.org/W2963524571","https://openalex.org/W4292828922","https://openalex.org/W4312933868","https://openalex.org/W4386083141","https://openalex.org/W4387968172","https://openalex.org/W4388191297","https://openalex.org/W4390190334"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Video":[0,121],"Virtual":[1,122],"Try-On":[2,123],"aims":[3],"to":[4,27,31,35,136,162,188,211,215],"transfer":[5],"a":[6,9,108,168,194,224],"garment":[7,94,157,164,173,203,217],"onto":[8],"person":[10],"in":[11,41,242,260],"the":[12,62,86,89,93,98,126,129,138,156,159,172,175,178,181,209,213,216,236,269],"video.":[13,100],"Previous":[14],"methods":[15,26,259],"typically":[16],"focus":[17],"on":[18,113,133,185],"image-based":[19,262],"virtual":[20,43,66,226,265],"try-on,":[21],"but":[22],"directly":[23],"applying":[24],"these":[25,103],"videos":[28,231],"often":[29],"leads":[30],"temporal":[32,190,206,219],"discontinuity":[33],"due":[34],"inconsistencies":[36],"between":[37,171],"frames.":[38],"Limited":[39],"attempts":[40],"video":[42,65,186,225,245],"try-on":[44,67,143,227,266],"also":[45],"suffer":[46],"from":[47,232],"unrealistic":[48],"results":[49,77],"and":[50,80,92,151,174,247,263],"poor":[51],"generalization":[52],"ability.":[53],"In":[54,125,177],"light":[55],"of":[56,64,140,155,239,244,271],"previous":[57],"research,":[58],"we":[59,106,222],"posit":[60],"that":[61,201,253],"task":[63],"can":[68],"be":[69],"decomposed":[70],"into":[71,158,205],"two":[72,104],"key":[73],"aspects:":[74],"(1)":[75],"single-frame":[76,134],"are":[78,95],"realistic":[79],"natural,":[81],"while":[82,166],"retaining":[83],"consistency":[84],"with":[85],"garment;":[87],"(2)":[88],"person's":[90],"actions":[91],"coherent":[96],"throughout":[97],"entire":[99],"To":[101],"address":[102],"aspects,":[105],"propose":[107],"novel":[109,195],"two-stage":[110],"framework":[111],"based":[112],"Latent":[114],"Diffusion":[115,119],"Model,":[116],"namely":[117],"Garment-Preserving":[118],"for":[120],"(GPD-VVTO).":[124],"first":[127],"stage,":[128,180],"model":[130,182,210],"is":[131,183],"trained":[132,184],"data":[135,187],"improve":[137],"ability":[139],"generating":[141],"high-quality":[142],"images.":[144],"We":[145,192],"integrate":[146],"both":[147,261],"low-level":[148],"texture":[149],"features":[150,154,204],"high-level":[152],"semantic":[153],"denoising":[160],"network":[161],"preserve":[163],"details":[165],"ensuring":[167],"natural":[169],"fit":[170],"person.":[176],"second":[179],"enhance":[189],"consistency.":[191],"devise":[193],"Garment-aware":[196],"Temporal":[197],"Attention":[198],"(GTA)":[199],"module":[200],"incorporates":[202],"attention,":[207],"enabling":[208],"maintain":[212],"fidelity":[214],"during":[218],"modeling.":[220],"Furthermore,":[221],"collect":[223],"dataset":[228],"containing":[229],"high-resolution":[230],"diverse":[233],"scenes,":[234],"addressing":[235],"limited":[237],"variety":[238],"current":[240],"datasets":[241],"terms":[243],"background":[246],"human":[248],"actions.":[249],"Extensive":[250],"experiments":[251],"demonstrate":[252],"our":[254,272],"method":[255],"outperforms":[256],"existing":[257],"state-of-the-art":[258],"video-based":[264],"tasks,":[267],"indicating":[268],"effectiveness":[270],"proposed":[273],"framework.":[274]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-11T08:14:18.477133","created_date":"2025-10-10T00:00:00"}
