{"id":"https://openalex.org/W4414578664","doi":"https://doi.org/10.1109/iccv51701.2025.01715","title":"REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder","display_name":"REGEN: Learning Compact Video Embedding with (Re-)Generative Decoder","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4414578664","doi":"https://doi.org/10.1109/iccv51701.2025.01715"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.01715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01715","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.08665","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115844310","display_name":"Yitian Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yitian Zhang","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119049746","display_name":"Long Mai","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Long Mai","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Aniruddha Mahapatra","orcid":null},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aniruddha Mahapatra","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001401596","display_name":"David Bourgin","orcid":"https://orcid.org/0000-0003-1039-6195"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Bourgin","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036105537","display_name":"Yicong Hong","orcid":"https://orcid.org/0000-0002-5068-1508"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yicong Hong","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014125577","display_name":"Jonah Casebeer","orcid":"https://orcid.org/0000-0002-8741-9773"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonah Casebeer","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100415287","display_name":"Feng Liu","orcid":"https://orcid.org/0000-0002-5399-6214"},"institutions":[{"id":"https://openalex.org/I1306409833","display_name":"Adobe Systems (United States)","ror":"https://ror.org/059tvcg64","country_code":"US","type":"company","lineage":["https://openalex.org/I1306409833"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feng Liu","raw_affiliation_strings":["Adobe Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Adobe Research","institution_ids":["https://openalex.org/I1306409833"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005819096","display_name":"Yun Fu","orcid":"https://orcid.org/0000-0002-5098-2853"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Yun Fu","raw_affiliation_strings":["Northeastern University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22182073,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"18453","last_page":"18462"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9664999842643738,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9664999842643738,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9373999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.904699981212616,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6662999987602234},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6159999966621399},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4505000114440918},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4156999886035919},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3901999890804291},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.38929998874664307},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.38019999861717224},{"id":"https://openalex.org/keywords/video-quality","display_name":"Video quality","score":0.36410000920295715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7804999947547913},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6662999987602234},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6159999966621399},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49729999899864197},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4505000114440918},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.42399999499320984},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4156999886035919},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3901999890804291},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.38929998874664307},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.38019999861717224},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.36410000920295715},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.3515999913215637},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.34700000286102295},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3368000090122223},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C150817343","wikidata":"https://www.wikidata.org/wiki/Q875932","display_name":"Digital watermarking","level":3,"score":0.32910001277923584},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C57654395","wikidata":"https://www.wikidata.org/wiki/Q1097775","display_name":"Compression artifact","level":5,"score":0.3122999966144562},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01715","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.08665","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.08665","pdf_url":"https://arxiv.org/pdf/2503.08665","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2503.08665","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.08665","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.08665","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.08665","pdf_url":"https://arxiv.org/pdf/2503.08665","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,64,73,80,131,158],"novel":[3],"perspective":[4],"on":[5,27,90],"learning":[6],"video":[7,56,93,128,143],"embedders":[8,129],"for":[9,154],"generative":[10,47],"modeling:":[11],"rather":[12],"than":[13,141],"requiring":[14],"an":[15,19,22,59],"exact":[16],"reproduction":[17],"of":[18,45,120,135,149],"input":[20],"video,":[21],"effective":[23],"embedder":[24,57],"should":[25],"focus":[26],"synthesizing":[28],"visually":[29],"plausible":[30],"reconstructions.":[31],"This":[32],"relaxed":[33],"criterion":[34],"enables":[35,102],"substantial":[36],"improvements":[37],"in":[38,162],"compression":[39,113,133],"ratios":[40],"without":[41],"compromising":[42],"the":[43,53,87,91,112,118,147],"quality":[44],"downstream":[46],"models.":[48],"Specifically,":[49],"we":[50,78,123],"propose":[51],"replacing":[52],"conventional":[54],"encoder-decoder":[55],"with":[58],"encoder-generator":[60],"framework":[61],"that":[62,99],"employs":[63],"diffusion":[65,164],"transformer":[66],"(DiT)":[67],"to":[68,85,107,137],"synthesize":[69],"missing":[70],"details":[71],"from":[72,126],"compact":[74],"latent":[75,82,94,152,163],"space.":[76],"Therein,":[77],"develop":[79],"dedicated":[81],"conditioning":[83],"module":[84],"condition":[86],"DiT":[88],"decoder":[89],"encoded":[92],"embedding.":[95],"Our":[96],"experiments":[97],"demonstrate":[98,117],"our":[100,121,127],"approach":[101],"superior":[103],"encoding-decoding":[104],"performance":[105],"compared":[106],"state-of-the-art":[108],"methods,":[109],"particularly":[110],"as":[111],"ratio":[114,134],"increases.":[115],"To":[116],"efficacy":[119],"approach,":[122],"report":[124],"results":[125],"achieving":[130],"temporal":[132],"up":[136],"32x":[138],"(8x":[139],"higher":[140],"leading":[142],"embedders)":[144],"and":[145,167],"validate":[146],"robustness":[148],"this":[150],"ultra-compact":[151],"space":[153],"text-to-video":[155],"generation,":[156],"providing":[157],"significant":[159],"efficiency":[160],"boost":[161],"model":[165],"training":[166],"inference.":[168]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
