{"id":"https://openalex.org/W7138010376","doi":"https://doi.org/10.1609/aaai.v40i15.38267","title":"Zo3T: Zero-Shot 3D-Aware Trajectory-Guided Image-to-Video Generation via Test-Time Training","display_name":"Zo3T: Zero-Shot 3D-Aware Trajectory-Guided Image-to-Video Generation via Test-Time Training","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138010376","doi":"https://doi.org/10.1609/aaai.v40i15.38267"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i15.38267","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i15.38267","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38267/42229","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38267/42229","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129686798","display_name":"Ruicheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruicheng Zhang","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129736638","display_name":"Jun Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Zhou","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704874","display_name":"Zunnan Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zunnan Xu","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742906","display_name":"Zihao Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihao Liu","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076907782","display_name":"Jiehui Huang","orcid":"https://orcid.org/0000-0002-3099-2886"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Jiehui Huang","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129673129","display_name":"Mingyang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mingyang Zhang","raw_affiliation_strings":["China University of Geoscience"],"affiliations":[{"raw_affiliation_string":"China University of Geoscience","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129657135","display_name":"Yu Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Sun","raw_affiliation_strings":["Sun Yat-sen University"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129686104","display_name":"Xiu Li","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu Li","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129686798"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28731343,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"15","first_page":"12708","last_page":"12716"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3458999991416931,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3458999991416931,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.22660000622272491,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.1200999990105629,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5318999886512756},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5303999781608582},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.5105000138282776},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4934000074863434},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4887999892234802},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4528999924659729},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.4339999854564667},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4187999963760376},{"id":"https://openalex.org/keywords/kinematics","display_name":"Kinematics","score":0.41350001096725464}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7710999846458435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5784000158309937},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5318999886512756},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5303999781608582},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.5105000138282776},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4934000074863434},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4887999892234802},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4339999854564667},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4187999963760376},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.41350001096725464},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.41179999709129333},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4020000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39980000257492065},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.38269999623298645},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3822000026702881},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3635999858379364},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.35179999470710754},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.33889999985694885},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C145565327","wikidata":"https://www.wikidata.org/wiki/Q852514","display_name":"Motion control","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C50637493","wikidata":"https://www.wikidata.org/wiki/Q1136781","display_name":"Morphing","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i15.38267","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i15.38267","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38267/42229","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i15.38267","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i15.38267","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38267/42229","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138010376.pdf","grobid_xml":"https://content.openalex.org/works/W7138010376.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Trajectory-Guided":[0,101],"image-to-video":[1],"(I2V)":[2],"generation":[3,74],"aims":[4],"to":[5,10,31,90,140,146],"synthesize":[6],"videos":[7],"that":[8,106],"adhere":[9],"user-specified":[11],"motion":[12,42,133,194],"instructions.":[13],"Existing":[14],"methods":[15,29],"typically":[16],"rely":[17],"on":[18,22],"computationally":[19],"expensive":[20],"fine-tuning":[21],"scarce":[23],"annotated":[24],"datasets.":[25],"Although":[26],"some":[27],"zero-shot":[28,69,207],"attempt":[30],"trajectory":[32],"control":[33],"in":[34,196],"the":[35,52,56,115,119,137,147,165,171,185],"latent":[36,120],"space,":[37],"they":[38],"may":[39],"yield":[40],"unrealistic":[41],"by":[43,123,169],"neglecting":[44],"3D":[45,191],"perspective":[46],"and":[47,55,109,154,193,206],"creating":[48],"a":[49,67,82,104,124,176],"misalignment":[50],"between":[51],"manipulated":[53,148],"latents":[54],"network's":[57],"noise":[58],"predictions.":[59],"To":[60],"address":[61],"these":[62],"challenges,":[63],"we":[64,80,99,158],"introduce":[65,100],"Zo3T,":[66],"novel":[68],"test-time-training":[70],"framework":[71],"for":[72,95],"trajectory-guided":[73],"with":[75],"three":[76],"core":[77],"innovations:":[78],"First,":[79],"incorporate":[81],"3D-Aware":[83],"Kinematic":[84],"Projection,":[85],"leveraging":[86],"inferring":[87],"scene":[88],"depth":[89],"derive":[91],"perspective-correct":[92],"affine":[93],"transformations":[94],"target":[96,186],"regions.":[97],"Second,":[98],"Test-Time":[102],"LoRA,":[103],"mechanism":[105],"dynamically":[107],"injects":[108],"optimizes":[110],"ephemeral":[111],"LoRA":[112],"adapters":[113],"into":[114],"denoising":[116,166],"network":[117],"alongside":[118],"state.":[121],"Driven":[122],"regional":[125],"feature":[126],"consistency":[127],"loss,":[128],"this":[129],"co-adaptation":[130],"effectively":[131],"enforces":[132],"constraints":[134],"while":[135],"allowing":[136],"pre-trained":[138],"model":[139],"locally":[141],"adapt":[142],"its":[143],"internal":[144],"representations":[145],"latent,":[149],"thereby":[150],"ensuring":[151,180],"generative":[152,182],"fidelity":[153],"on-manifold":[155],"adherence.":[156],"Finally,":[157],"develop":[159],"Guidance":[160],"Field":[161],"Rectification,":[162],"which":[163],"refines":[164],"evolutionary":[167],"path":[168],"optimizing":[170],"conditional":[172],"guidance":[173],"field":[174],"through":[175],"one-step":[177],"lookahead":[178],"strategy,":[179],"efficient":[181],"progression":[183],"towards":[184],"trajectory.":[187],"Zo3T":[188],"significantly":[189],"enhances":[190],"realism":[192],"accuracy":[195],"trajectory-controlled":[197],"I2V":[198],"generation,":[199],"demonstrating":[200],"superior":[201],"performance":[202],"over":[203],"existing":[204],"training-based":[205],"approaches.":[208]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
