{"id":"https://openalex.org/W7164808897","doi":"https://doi.org/10.1145/3805622.3810600","title":"Toward Generation-Centric Coding: Compressing Latents representation for TI2V Synthesis","display_name":"Toward Generation-Centric Coding: Compressing Latents representation for TI2V Synthesis","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164808897","doi":"https://doi.org/10.1145/3805622.3810600"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810600","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810600","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810600","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062975995","display_name":"Jianran Liu","orcid":"https://orcid.org/0000-0002-5835-7913"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianran Liu","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5835-7913","affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042941682","display_name":"Wen Ji","orcid":"https://orcid.org/0000-0001-6895-3404"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Ji","raw_affiliation_strings":["State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China and Institute of AI for Industries, CAS, Nanjing, Jiangsu, China"],"raw_orcid":"https://orcid.org/0000-0001-6895-3404","affiliations":[{"raw_affiliation_string":"State Key Lab of Processors, Institute of Computing Technology, CAS, Beijing, China and Institute of AI for Industries, CAS, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033015800","display_name":"Xiaokai Meng","orcid":"https://orcid.org/0000-0003-1348-1168"},"institutions":[{"id":"https://openalex.org/I1335486098","display_name":"Electric Power Research Institute","ror":"https://ror.org/02dqztz06","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1335486098"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaokai Meng","raw_affiliation_strings":["Electric Power Research Institute, State Grid Shanxi Electric Power Co., Ltd, Taiyuan, Shanxi, China"],"raw_orcid":"https://orcid.org/0000-0003-1348-1168","affiliations":[{"raw_affiliation_string":"Electric Power Research Institute, State Grid Shanxi Electric Power Co., Ltd, Taiyuan, Shanxi, China","institution_ids":["https://openalex.org/I1335486098"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083360053","display_name":"Wancai Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118629","display_name":"NARI Group (China)","ror":"https://ror.org/02egn3136","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210118629"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wancai Zhang","raw_affiliation_strings":["NARI Technology Co., Ltd., Nanjing, Jiangsu, China"],"raw_orcid":"https://orcid.org/0009-0003-8943-8139","affiliations":[{"raw_affiliation_string":"NARI Technology Co., Ltd., Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I4210118629"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100347261","display_name":"Ying Wang","orcid":"https://orcid.org/0000-0003-4524-1812"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ying Wang","raw_affiliation_strings":["LonganPi Intelligent Information Technology Co., Ltd., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-8022-6685","affiliations":[{"raw_affiliation_string":"LonganPi Intelligent Information Technology Co., Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210153682"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.9340042,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1870","last_page":"1878"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3034000098705292,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3034000098705292,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.2345999926328659,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.18930000066757202,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.6970999836921692},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.5965999960899353},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.49070000648498535},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.47870001196861267},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.4675000011920929},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.45559999346733093},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4374000132083893},{"id":"https://openalex.org/keywords/rate\u2013distortion-theory","display_name":"Rate\u2013distortion theory","score":0.41659998893737793},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4050999879837036},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4009999930858612}],"concepts":[{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.6970999836921692},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6448000073432922},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5544999837875366},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.49070000648498535},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.47870001196861267},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4767000079154968},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.4675000011920929},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.45559999346733093},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4374000132083893},{"id":"https://openalex.org/C64185310","wikidata":"https://www.wikidata.org/wiki/Q843483","display_name":"Rate\u2013distortion theory","level":3,"score":0.41659998893737793},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4009999930858612},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.39079999923706055},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.33570000529289246},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C169805256","wikidata":"https://www.wikidata.org/wiki/Q1361381","display_name":"Transform coding","level":4,"score":0.3125999867916107},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.31119999289512634},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.29280000925064087},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.28630000352859497},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.2700999975204468},{"id":"https://openalex.org/C132094186","wikidata":"https://www.wikidata.org/wiki/Q641585","display_name":"Clutter","level":3,"score":0.2696000039577484},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2669999897480011},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.258899986743927},{"id":"https://openalex.org/C2987812609","wikidata":"https://www.wikidata.org/wiki/Q194158","display_name":"Bit rate","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810600","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810600","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810600","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810600","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1580389772","https://openalex.org/W1861492603","https://openalex.org/W2962785568","https://openalex.org/W3034469748","https://openalex.org/W3090720321","https://openalex.org/W3113374047","https://openalex.org/W3173749647","https://openalex.org/W3180355996","https://openalex.org/W3202918664","https://openalex.org/W4223425316","https://openalex.org/W4304465124","https://openalex.org/W4312806968","https://openalex.org/W4316660599","https://openalex.org/W4367146862","https://openalex.org/W4380303706","https://openalex.org/W4386075611","https://openalex.org/W4393148714","https://openalex.org/W4402727980","https://openalex.org/W4402753569","https://openalex.org/W4404527320","https://openalex.org/W4408884279","https://openalex.org/W4409242344","https://openalex.org/W4409365504","https://openalex.org/W4409366751","https://openalex.org/W4413144436","https://openalex.org/W4415795864","https://openalex.org/W4415800779","https://openalex.org/W7116062398","https://openalex.org/W7133196311"],"related_works":[],"abstract_inverted_index":{"Text-Image-to-Video":[0],"(TI2V)":[1],"generation":[2],"can":[3],"be":[4,17],"sensitive":[5],"to":[6,23,82,119],"compression":[7],"distortions":[8],"in":[9,50],"the":[10,46,51,115],"reference":[11],"image.":[12],"Small":[13],"pixel-space":[14],"artifacts":[15],"may":[16],"amplified":[18],"through":[19],"iterative":[20],"generation,":[21],"leading":[22],"temporal":[24,102],"inconsistency":[25],"and":[26,75,79,104,110],"semantic":[27],"drift.":[28],"One":[29],"contributing":[30],"factor":[31],"is":[32],"that":[33,66,92],"conventional":[34],"codecs":[35],"are":[36],"optimized":[37],"for":[38,45,107],"human-oriented":[39],"pixel":[40],"fidelity,":[41],"without":[42],"explicitly":[43],"accounting":[44],"distribution":[47],"shift":[48],"induced":[49],"generator\u2019s":[52],"conditioning":[53,84],"latents.":[54],"Motivated":[55],"by":[56],"this":[57],"mismatch,":[58],"we":[59],"propose":[60],"a":[61],"generation-centric":[62],"latent-domain":[63],"coding":[64],"framework":[65],"compresses":[67],"generator-facing":[68],"deep":[69],"latents":[70],"instead":[71],"of":[72],"raw":[73],"pixels,":[74],"jointly":[76],"optimizes":[77],"rate":[78],"latent-space":[80],"distortion":[81,96],"preserve":[83],"statistics.":[85],"Experiments":[86],"across":[87],"four":[88],"bitrate":[89],"levels":[90],"show":[91],"our":[93],"approach":[94],"mitigates":[95],"accumulation":[97],"over":[98],"iterations.":[99],"It":[100],"improves":[101],"coherence":[103],"content":[105],"fidelity":[106],"both":[108],"short":[109],"extended":[111],"sequences,":[112],"consistently":[113],"reducing":[114],"pixel-level":[116],"discrepancy":[117],"relative":[118],"videos":[120],"generated":[121],"with":[122],"lossless":[123],"conditioning.":[124]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
