{"id":"https://openalex.org/W4415536923","doi":"https://doi.org/10.1145/3746027.3754956","title":"Gen4Track: A Tuning-free Data Augmentation Framework via Self-correcting Diffusion Model for Vision-Language Tracking","display_name":"Gen4Track: A Tuning-free Data Augmentation Framework via Self-correcting Diffusion Model for Vision-Language Tracking","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415536923","doi":"https://doi.org/10.1145/3746027.3754956"},"language":"en","primary_location":{"id":"doi:10.1145/3746027.3754956","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pure.qub.ac.uk/en/publications/476c24e0-7a42-4558-aae4-8e87ff116db5","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018527272","display_name":"Jiawei Ge","orcid":"https://orcid.org/0000-0001-7268-7815"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiawei Ge","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0001-7268-7815","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100769571","display_name":"Xinyu Zhang","orcid":"https://orcid.org/0000-0002-2838-1445"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyu Zhang","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-2838-1445","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012384188","display_name":"Jiuxin Cao","orcid":"https://orcid.org/0000-0002-2448-6717"},"institutions":[{"id":"https://openalex.org/I4210155350","display_name":"Purple Mountain Laboratories","ror":"https://ror.org/04zcbk583","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210155350"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiuxin Cao","raw_affiliation_strings":["Southeast University, Nanjing, China and Purple Mountain Laboratories, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-2448-6717","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China and Purple Mountain Laboratories, Nanjing, China","institution_ids":["https://openalex.org/I4210155350"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076214561","display_name":"Xuelin Zhu","orcid":"https://orcid.org/0000-0001-7676-2843"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xuelin Zhu","raw_affiliation_strings":["The Hong Kong Polytechnic University, Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0001-7676-2843","affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University, Hong Kong, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747522","display_name":"Weijia Liu","orcid":"https://orcid.org/0000-0003-2634-7283"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weijia Liu","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-2634-7283","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014296277","display_name":"Qingqing Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingqing Gao","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0007-9674-3624","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077125725","display_name":"Biwei Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Biwei Cao","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-3375-404X","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042411401","display_name":"Kun Wang","orcid":"https://orcid.org/0000-0002-6735-7667"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Wang","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-6735-7667","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chang Liu","orcid":"https://orcid.org/0009-0003-3865-2434"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Liu","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0003-3865-2434","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100461575","display_name":"Bo Liu","orcid":"https://orcid.org/0000-0001-5209-9063"},"institutions":[{"id":"https://openalex.org/I4210155350","display_name":"Purple Mountain Laboratories","ror":"https://ror.org/04zcbk583","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210155350"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Liu","raw_affiliation_strings":["Southeast University, Nanjing, China and Purple Mountain Laboratories, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0001-5209-9063","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China and Purple Mountain Laboratories, Nanjing, China","institution_ids":["https://openalex.org/I4210155350"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100352678","display_name":"Feng Chen","orcid":"https://orcid.org/0000-0001-9199-559X"},"institutions":[{"id":"https://openalex.org/I126231945","display_name":"Queen's University Belfast","ror":"https://ror.org/00hswnk62","country_code":"GB","type":"education","lineage":["https://openalex.org/I126231945"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chen Feng","raw_affiliation_strings":["University College London, London, United Kingdom and Queen's University Belfast, Belfast, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-9199-559X","affiliations":[{"raw_affiliation_string":"University College London, London, United Kingdom and Queen's University Belfast, Belfast, United Kingdom","institution_ids":["https://openalex.org/I126231945","https://openalex.org/I45129253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031205865","display_name":"Ioannis Patras","orcid":"https://orcid.org/0000-0003-3913-4738"},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ioannis Patras","raw_affiliation_strings":["Queen Mary University of London, London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0003-3913-4738","affiliations":[{"raw_affiliation_string":"Queen Mary University of London, London, United Kingdom","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5018527272"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28406726,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3037","last_page":"3046"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9745000004768372,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9682999849319458,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4997999966144562},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.48350000381469727},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.4431000053882599},{"id":"https://openalex.org/keywords/rotation","display_name":"Rotation (mathematics)","score":0.4345000088214874},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4341000020503998},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.40610000491142273},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.4056999981403351},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.39809998869895935}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.817300021648407},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.527400016784668},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4997999966144562},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.48350000381469727},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.4431000053882599},{"id":"https://openalex.org/C74050887","wikidata":"https://www.wikidata.org/wiki/Q848368","display_name":"Rotation (mathematics)","level":2,"score":0.4345000088214874},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.4056999981403351},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.39809998869895935},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3822000026702881},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.3481999933719635},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30809998512268066},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27079999446868896},{"id":"https://openalex.org/C2777851325","wikidata":"https://www.wikidata.org/wiki/Q7094102","display_name":"Online model","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.25839999318122864},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2515999972820282}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746027.3754956","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754956","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/476c24e0-7a42-4558-aae4-8e87ff116db5","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/476c24e0-7a42-4558-aae4-8e87ff116db5","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ge, J, Zhang, X, Gao, Q, Zhu, X, Liu, W, Cao, B, Wang, K, Liu, C, Liu, B, Feng, C & Patras, I 2025, Gen4Track: a tuning-free data augmentation framework via self-correcting diffusion model for vision-language tracking. in Proceedings of the 33rd ACM International Conference on Multimedia (MM \u201925). Association for Computing Machinery, pp. 3037-3046, MM '25: 33rd ACM International Conference on Multimedia, Dublin, Ireland, 27/10/2025. https://doi.org/10.1145/3746027.3754956","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:pure.qub.ac.uk/portal:openaire/476c24e0-7a42-4558-aae4-8e87ff116db5","is_oa":true,"landing_page_url":"https://pure.qub.ac.uk/en/publications/476c24e0-7a42-4558-aae4-8e87ff116db5","pdf_url":null,"source":{"id":"https://openalex.org/S4306402319","display_name":"Research Portal (Queen's University Belfast)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I126231945","host_organization_name":"Queen's University Belfast","host_organization_lineage":["https://openalex.org/I126231945"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ge, J, Zhang, X, Gao, Q, Zhu, X, Liu, W, Cao, B, Wang, K, Liu, C, Liu, B, Feng, C & Patras, I 2025, Gen4Track: a tuning-free data augmentation framework via self-correcting diffusion model for vision-language tracking. in Proceedings of the 33rd ACM International Conference on Multimedia (MM \u201925). Association for Computing Machinery, pp. 3037-3046, MM '25: 33rd ACM International Conference on Multimedia, Dublin, Ireland, 27/10/2025. https://doi.org/10.1145/3746027.3754956","raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1067237025","display_name":null,"funder_award_id":"No. 62472092, No. 62172089, No. 62106045","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3382621848","display_name":null,"funder_award_id":"No. BK20241751","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1917989004","https://openalex.org/W2747053578","https://openalex.org/W2891033863","https://openalex.org/W2946245424","https://openalex.org/W2963074722","https://openalex.org/W2963093735","https://openalex.org/W2963109634","https://openalex.org/W3106542916","https://openalex.org/W3136789123","https://openalex.org/W3176709420","https://openalex.org/W3181069167","https://openalex.org/W3212516020","https://openalex.org/W4312933868","https://openalex.org/W4385537492","https://openalex.org/W4385801014","https://openalex.org/W4386075643","https://openalex.org/W4386113271","https://openalex.org/W4390796926","https://openalex.org/W4390872387","https://openalex.org/W4390873054","https://openalex.org/W4390873135","https://openalex.org/W4390874143","https://openalex.org/W4390874575","https://openalex.org/W4393148714","https://openalex.org/W4394593136","https://openalex.org/W4394625750","https://openalex.org/W4399310901","https://openalex.org/W4399852316","https://openalex.org/W4402502610","https://openalex.org/W4402716132","https://openalex.org/W4404782964","https://openalex.org/W4408930345","https://openalex.org/W4410118623"],"related_works":[],"abstract_inverted_index":{"The":[0],"performance":[1,232],"of":[2,16,74,89,204,233,239,253],"current":[3],"Vision-Language":[4,255],"Tracking":[5],"(VLT)":[6],"models":[7,69,236],"is":[8],"constrained":[9],"by":[10,33],"the":[11,72,86,108,120,136,198,202,211,231],"limited":[12],"diversity":[13],"and":[14,51,62,102,158,160,184,194,245],"quantity":[15],"labeled":[17],"data.":[18,264],"Compared":[19],"to":[20,84,123,145,173,241],"constructing":[21],"large-scale":[22],"datasets,":[23],"data":[24,78,90,115,128],"augmentation":[25,91,116,212],"offers":[26],"a":[27,113,140,163,168,220,250],"more":[28,221],"cost-saving":[29],"strategy":[30],"for":[31,77,219],"VLT":[32,93,235],"synthesizing":[34],"new":[35,251],"samples":[36],"from":[37,44],"existing":[38,81],"data,":[39],"rather":[40,260],"than":[41,261],"generating":[42],"them":[43],"scratch.":[45],"However,":[46],"conventional":[47],"techniques":[48],"like":[49],"rotation":[50],"flipping":[52],"may":[53],"disrupt":[54],"scene":[55],"composition,":[56],"causing":[57],"conflicts":[58],"between":[59],"visual":[60],"layouts":[61],"textual":[63],"annotations.":[64,130],"Recent":[65],"advances":[66],"in":[67,92,95,139,243,247],"generative":[68],"have":[70],"inspired":[71],"use":[73],"synthetic":[75,258],"videos":[76,148,259],"augmentation.":[79],"Yet,":[80],"approaches":[82],"fail":[83],"address":[85],"core":[87],"concerns":[88],"(shown":[94],"Fig.":[96],"1)-target":[97],"location":[98],"accuracy,":[99],"text-video":[100,175,206],"consistency,":[101,207],"video":[103,127,178],"content":[104],"coherency.":[105],"To":[106],"bridge":[107],"gap,":[109],"we":[110,180,208],"propose":[111,181],"Gen4Track,":[112],"tuning-free":[114],"framework":[117],"that":[118,149,189,227],"leverages":[119],"self-correcting":[121,164,217],"mechanism":[122,165,218],"dynamically":[124],"generate":[125],"high-quality":[126],"with":[129,201,214,257],"Our":[131],"approach":[132],"involves":[133],"(1)":[134],"optimizing":[135],"attention":[137],"calculations":[138],"frozen":[141],"text-to-image":[142],"diffusion":[143],"model":[144],"synthesize":[146],"coherent":[147],"satisfy":[150],"specific":[151],"conditions":[152],"(e.g.,":[153],"spatial":[154],"location,":[155],"category,":[156],"color,":[157],"style),":[159],"(2)":[161],"implementing":[162],"based":[166],"on":[167],"Large":[169],"Language":[170],"Model":[171],"(LLM)":[172],"improve":[174],"consistency.":[176],"During":[177],"augmentation,":[179],"content-coherent":[182],"self-attention":[183],"location-enhanced":[185],"cross-attention":[186],"mechanisms,":[187],"ensuring":[188],"image-level":[190],"editings":[191],"are":[192],"accurately":[193],"coherently":[195],"propagated":[196],"throughout":[197],"video.":[199,223],"Then,":[200],"goal":[203],"maximizing":[205],"iteratively":[209],"refine":[210],"instruction":[213],"our":[215],"designed":[216],"aligned":[222],"Extensive":[224],"experiments":[225],"validate":[226],"Gen4Track":[228],"significantly":[229],"boosts":[230],"SOTA":[234],"(achieving":[237],"improvements":[238],"up":[240],"3.2%":[242],"SUC":[244],"3.5%":[246],"PRE),":[248],"opening":[249],"chapter":[252],"training":[254],"trackers":[256],"manually":[262],"annotated":[263]},"counts_by_year":[],"updated_date":"2026-04-30T09:15:22.047038","created_date":"2025-10-25T00:00:00"}
