{"id":"https://openalex.org/W4361856613","doi":"https://doi.org/10.1109/tmm.2023.3262972","title":"Controllable Video Generation With Text-Based Instructions","display_name":"Controllable Video Generation With Text-Based Instructions","publication_year":2023,"publication_date":"2023-03-29","ids":{"openalex":"https://openalex.org/W4361856613","doi":"https://doi.org/10.1109/tmm.2023.3262972"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2023.3262972","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3262972","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022872060","display_name":"Ali K\u00f6ksal","orcid":"https://orcid.org/0000-0001-8966-592X"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Ali K\u00f6ksal","raw_affiliation_strings":["Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","School of Computer Science and Engineering (SCSE), Nanyang Technological University (NTU), Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"School of Computer Science and Engineering (SCSE), Nanyang Technological University (NTU), Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073743493","display_name":"Kenan E. Ak","orcid":"https://orcid.org/0000-0001-5863-3685"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Kenan E. Ak","raw_affiliation_strings":["Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100649794","display_name":"Ying Sun","orcid":"https://orcid.org/0000-0002-7224-6726"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ying Sun","raw_affiliation_strings":["Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","Centre for Frontier AI Research, A*STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Centre for Frontier AI Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009372982","display_name":"Deepu Rajan","orcid":"https://orcid.org/0000-0001-7788-8368"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Deepu Rajan","raw_affiliation_strings":["School of Computer Science and Engineering (SCSE), Nanyang Technological University (NTU), Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering (SCSE), Nanyang Technological University (NTU), Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077258552","display_name":"Joo\u2010Hwee Lim","orcid":"https://orcid.org/0000-0002-4103-3824"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Joo Hwee Lim","raw_affiliation_strings":["Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","Centre for Frontier AI Research, A*STAR, Singapore","School of Computer Science and Engineering (SCSE), Nanyang Technological University (NTU), Singapore"],"affiliations":[{"raw_affiliation_string":"Department of Visual Intelligence, Institute for Infocomm Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"Centre for Frontier AI Research, A*STAR, Singapore","institution_ids":["https://openalex.org/I115228651"]},{"raw_affiliation_string":"School of Computer Science and Engineering (SCSE), Nanyang Technological University (NTU), Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5022872060"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I172675005","https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":1.6851,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86092949,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"26","issue":null,"first_page":"190","last_page":"201"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8649301528930664},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6034725308418274},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5942291021347046},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5358250141143799},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.533584475517273},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5244598388671875},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5078142285346985},{"id":"https://openalex.org/keywords/motion-estimation","display_name":"Motion estimation","score":0.5060190558433533},{"id":"https://openalex.org/keywords/motion-control","display_name":"Motion control","score":0.49274805188179016},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48485124111175537},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4381397068500519},{"id":"https://openalex.org/keywords/robotic-arm","display_name":"Robotic arm","score":0.42757248878479004},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.4259370267391205},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.41846412420272827},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4159296751022339}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8649301528930664},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6034725308418274},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5942291021347046},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5358250141143799},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.533584475517273},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5244598388671875},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5078142285346985},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.5060190558433533},{"id":"https://openalex.org/C145565327","wikidata":"https://www.wikidata.org/wiki/Q852514","display_name":"Motion control","level":3,"score":0.49274805188179016},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48485124111175537},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4381397068500519},{"id":"https://openalex.org/C150415221","wikidata":"https://www.wikidata.org/wiki/Q40687","display_name":"Robotic arm","level":2,"score":0.42757248878479004},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.4259370267391205},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.41846412420272827},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4159296751022339},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2023.3262972","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3262972","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":81,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2133665775","https://openalex.org/W2593414223","https://openalex.org/W2799055999","https://openalex.org/W2884460600","https://openalex.org/W2895299763","https://openalex.org/W2902437806","https://openalex.org/W2904988869","https://openalex.org/W2912673695","https://openalex.org/W2916798096","https://openalex.org/W2948838566","https://openalex.org/W2952056941","https://openalex.org/W2962785568","https://openalex.org/W2962793481","https://openalex.org/W2962963674","https://openalex.org/W2963092440","https://openalex.org/W2963168844","https://openalex.org/W2963345606","https://openalex.org/W2963435596","https://openalex.org/W2963669520","https://openalex.org/W2965128575","https://openalex.org/W2966687987","https://openalex.org/W2976617189","https://openalex.org/W2982559712","https://openalex.org/W2984529706","https://openalex.org/W2996520109","https://openalex.org/W2996570742","https://openalex.org/W2996781180","https://openalex.org/W3031246127","https://openalex.org/W3034218667","https://openalex.org/W3034600949","https://openalex.org/W3034667500","https://openalex.org/W3035191209","https://openalex.org/W3095754584","https://openalex.org/W3101906322","https://openalex.org/W3116250864","https://openalex.org/W3133428285","https://openalex.org/W3152733922","https://openalex.org/W3174394676","https://openalex.org/W3181013985","https://openalex.org/W3196049079","https://openalex.org/W3215495615","https://openalex.org/W4221166209","https://openalex.org/W4281402002","https://openalex.org/W4281632497","https://openalex.org/W4283009758","https://openalex.org/W4287828128","https://openalex.org/W4295289379","https://openalex.org/W4298157202","https://openalex.org/W4298185919","https://openalex.org/W4303441850","https://openalex.org/W4308233907","https://openalex.org/W4310695675","https://openalex.org/W6631190155","https://openalex.org/W6677477928","https://openalex.org/W6712884540","https://openalex.org/W6718379498","https://openalex.org/W6735992252","https://openalex.org/W6745193426","https://openalex.org/W6745420753","https://openalex.org/W6748392304","https://openalex.org/W6749916090","https://openalex.org/W6750642828","https://openalex.org/W6756789066","https://openalex.org/W6761540984","https://openalex.org/W6763239785","https://openalex.org/W6765779288","https://openalex.org/W6767264202","https://openalex.org/W6767384525","https://openalex.org/W6768115207","https://openalex.org/W6771703261","https://openalex.org/W6774728187","https://openalex.org/W6793801364","https://openalex.org/W6797331426","https://openalex.org/W6810264703","https://openalex.org/W6810793953","https://openalex.org/W6838032239","https://openalex.org/W6838785959","https://openalex.org/W6840802339","https://openalex.org/W6844305113","https://openalex.org/W6846135447"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W2055243143","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W3155418658","https://openalex.org/W4243199227","https://openalex.org/W2312533462"],"abstract_inverted_index":{"Most":[0],"of":[1,24,31,121,151,176],"the":[2,29,74,91,97,156,174,184],"existing":[3],"studies":[4],"on":[5,35,61,155,163],"controllable":[6],"video":[7],"generation":[8],"either":[9],"transfer":[10],"disentangled":[11],"motion":[12,20,92],"to":[13,72],"an":[14,114],"appearance":[15],"without":[16],"detailed":[17,82],"control":[18,37,57,83,103,111,127,159,185],"over":[19,58,186],"or":[21],"generate":[22,143],"videos":[23,66,147,181],"simple":[25,122],"actions":[26,117],"such":[27],"as":[28,118,148],"movement":[30],"arbitrary":[32],"objects":[33,71],"conditioned":[34,154],"a":[36,62,119,149],"signal":[38,104,112],"from":[39,87],"users.":[40,88],"In":[41,110,136],"this":[42],"study,":[43],"we":[44,95],"introduce":[45],"Controllable":[46],"Video":[47],"Generation":[48],"with":[49,70,81,132],"text-based":[50,56,85,130],"Instructions":[51],"(CVGI)":[52],"framework":[53],"that":[54],"allows":[55],"action":[59,76,108,137,146],"performed":[60],"video.":[63],"CVGI":[64,177],"generates":[65],"where":[67],"hands":[68],"interact":[69],"perform":[73],"desired":[75],"by":[77,124],"generating":[78,179],"hand":[79,152],"motions":[80,123,153],"through":[84],"instruction":[86],"By":[89],"incorporating":[90],"estimation":[93,105],"layer,":[94],"divide":[96],"task":[98],"into":[99],"two":[100],"sub-tasks:":[101],"(1)":[102],"and":[106,170,182],"(2)":[107],"generation.":[109],"estimation,":[113],"encoder":[115],"models":[116],"set":[120],"estimating":[125],"low-level":[126],"signals":[128],"for":[129],"instructions":[131],"given":[133],"initial":[134],"frames.":[135],"generation,":[138],"generative":[139],"adversarial":[140],"networks":[141],"(GANs)":[142],"realistic":[144,180],"hand-based":[145],"combination":[150],"estimated":[157],"low":[158],"level":[160],"signal.":[161],"Evaluations":[162],"several":[164],"datasets":[165],"(EPIC-Kitchens-55,":[166],"BAIR":[167],"robot":[168],"pushing,":[169],"Atari":[171],"Breakout)":[172],"show":[173],"effectiveness":[175],"in":[178,183],"actions.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
