{"id":"https://openalex.org/W4386083157","doi":"https://doi.org/10.1109/cvpr52729.2023.02192","title":"Towards End-to-End Generative Modeling of Long Videos with Memory-Efficient Bidirectional Transformers","display_name":"Towards End-to-End Generative Modeling of Long Videos with Memory-Efficient Bidirectional Transformers","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4386083157","doi":"https://doi.org/10.1109/cvpr52729.2023.02192"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52729.2023.02192","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.02192","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101381937","display_name":"Jaehoon Yoo","orcid":null},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]},{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]}],"countries":["CA","KR"],"is_corresponding":true,"raw_author_name":"Jaehoon Yoo","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102014070","display_name":"Semin Kim","orcid":"https://orcid.org/0000-0003-3746-0863"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]},{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Semin Kim","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032607805","display_name":"Doyup Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]},{"id":"https://openalex.org/I207623266","display_name":"Kao Corporation (Japan)","ror":"https://ror.org/016t1kc57","country_code":"JP","type":"company","lineage":["https://openalex.org/I207623266"]}],"countries":["DE","JP"],"is_corresponding":false,"raw_author_name":"Doyup Lee","raw_affiliation_strings":["Kakao Brain"],"affiliations":[{"raw_affiliation_string":"Kakao Brain","institution_ids":["https://openalex.org/I207623266","https://openalex.org/I4210113520"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070355114","display_name":"Chiheon Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]},{"id":"https://openalex.org/I207623266","display_name":"Kao Corporation (Japan)","ror":"https://ror.org/016t1kc57","country_code":"JP","type":"company","lineage":["https://openalex.org/I207623266"]}],"countries":["DE","JP"],"is_corresponding":false,"raw_author_name":"Chiheon Kim","raw_affiliation_strings":["Kakao Brain"],"affiliations":[{"raw_affiliation_string":"Kakao Brain","institution_ids":["https://openalex.org/I207623266","https://openalex.org/I4210113520"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077461583","display_name":"Seunghoon Hong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]},{"id":"https://openalex.org/I207623266","display_name":"Kao Corporation (Japan)","ror":"https://ror.org/016t1kc57","country_code":"JP","type":"company","lineage":["https://openalex.org/I207623266"]}],"countries":["DE","JP"],"is_corresponding":false,"raw_author_name":"Seunghoon Hong","raw_affiliation_strings":["Kakao Brain"],"affiliations":[{"raw_affiliation_string":"Kakao Brain","institution_ids":["https://openalex.org/I207623266","https://openalex.org/I4210113520"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101381937"],"corresponding_institution_ids":["https://openalex.org/I157485424","https://openalex.org/I4210099236"],"apc_list":null,"apc_paid":null,"fwci":0.369,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.59694074,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"22888","last_page":"22897"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7876039743423462},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6911470890045166},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.662025511264801},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6113266944885254},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.5666656494140625},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5258094668388367},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4566141963005066},{"id":"https://openalex.org/keywords/dependency-grammar","display_name":"Dependency grammar","score":0.4424593448638916},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4258647561073303},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3228413462638855},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3221389651298523},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.19448253512382507},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09913372993469238}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7876039743423462},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6911470890045166},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.662025511264801},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6113266944885254},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.5666656494140625},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5258094668388367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4566141963005066},{"id":"https://openalex.org/C164883195","wikidata":"https://www.wikidata.org/wiki/Q674834","display_name":"Dependency grammar","level":3,"score":0.4424593448638916},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4258647561073303},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3228413462638855},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3221389651298523},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.19448253512382507},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09913372993469238},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52729.2023.02192","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.02192","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":97,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W1522734439","https://openalex.org/W2619947201","https://openalex.org/W2752796333","https://openalex.org/W2896457183","https://openalex.org/W2902437806","https://openalex.org/W2908510526","https://openalex.org/W2940988007","https://openalex.org/W2949099979","https://openalex.org/W2953273646","https://openalex.org/W2960404386","https://openalex.org/W2963092440","https://openalex.org/W2963373786","https://openalex.org/W2963799213","https://openalex.org/W2964245526","https://openalex.org/W2964303162","https://openalex.org/W2964318715","https://openalex.org/W2964327849","https://openalex.org/W2991019415","https://openalex.org/W2994903658","https://openalex.org/W2997571320","https://openalex.org/W2998108143","https://openalex.org/W3008823916","https://openalex.org/W3015468748","https://openalex.org/W3031246127","https://openalex.org/W3033529678","https://openalex.org/W3101906322","https://openalex.org/W3132648081","https://openalex.org/W3133405188","https://openalex.org/W3133428285","https://openalex.org/W3152733922","https://openalex.org/W3154010627","https://openalex.org/W3158432584","https://openalex.org/W3166584442","https://openalex.org/W3169012807","https://openalex.org/W3169064633","https://openalex.org/W3170082999","https://openalex.org/W3174394676","https://openalex.org/W3174902251","https://openalex.org/W3180355996","https://openalex.org/W3181942264","https://openalex.org/W3183681282","https://openalex.org/W3190965961","https://openalex.org/W3215495615","https://openalex.org/W4221163275","https://openalex.org/W4226053793","https://openalex.org/W4281632497","https://openalex.org/W4281853241","https://openalex.org/W4283449034","https://openalex.org/W4287667694","https://openalex.org/W4287704453","https://openalex.org/W4288095134","https://openalex.org/W4294037149","https://openalex.org/W4298157202","https://openalex.org/W4312423208","https://openalex.org/W4312546849","https://openalex.org/W4312633146","https://openalex.org/W4313021454","https://openalex.org/W4313156423","https://openalex.org/W6600983433","https://openalex.org/W6718379498","https://openalex.org/W6726983635","https://openalex.org/W6735992252","https://openalex.org/W6741985061","https://openalex.org/W6744881120","https://openalex.org/W6748392304","https://openalex.org/W6750642828","https://openalex.org/W6755207826","https://openalex.org/W6755477022","https://openalex.org/W6756789066","https://openalex.org/W6757817989","https://openalex.org/W6765529972","https://openalex.org/W6767264202","https://openalex.org/W6767796949","https://openalex.org/W6771703261","https://openalex.org/W6772853553","https://openalex.org/W6776048684","https://openalex.org/W6781533629","https://openalex.org/W6787802714","https://openalex.org/W6789900275","https://openalex.org/W6790720088","https://openalex.org/W6790830454","https://openalex.org/W6791098498","https://openalex.org/W6791314693","https://openalex.org/W6792601966","https://openalex.org/W6796753453","https://openalex.org/W6797139422","https://openalex.org/W6797331426","https://openalex.org/W6799838802","https://openalex.org/W6810170346","https://openalex.org/W6810226158","https://openalex.org/W6811288566","https://openalex.org/W6838638105","https://openalex.org/W6838785959","https://openalex.org/W6839024538","https://openalex.org/W6840802339","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W3179968364","https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W4327525404","https://openalex.org/W4287185323","https://openalex.org/W3150905897","https://openalex.org/W1999612375","https://openalex.org/W1520183331","https://openalex.org/W2734842993"],"abstract_inverted_index":{"Autoregressive":[0],"transformers":[1,11,140],"have":[2],"shown":[3],"remarkable":[4],"success":[5],"in":[6,20,59,68,83,97,146],"video":[7,82],"generation.":[8],"However,":[9],"the":[10,17,24,41,76,119,123,138],"are":[12,154],"prohibited":[13],"from":[14,32,85],"directly":[15],"learning":[16,55],"longterm":[18,57],"dependency":[19,58],"videos":[21,60,145],"due":[22,39],"to":[23,40,74,117],"quadratic":[25],"complexity":[26,96,128],"of":[27,56,80,111],"self-attention,":[28],"and":[29,36,61,100,114,129,149,152],"inherently":[30],"suffering":[31],"slow":[33],"inference":[34],"time":[35,95],"error":[37],"propagation":[38],"autoregressive":[42,139],"process.":[43],"In":[44],"this":[45],"paper,":[46],"we":[47],"propose":[48],"Memory-efficient":[49],"Bidirectional":[50],"Transformer":[51],"(MeBT)":[52],"for":[53,141],"end-to-end":[54],"fast":[62],"inference.":[63],"Based":[64],"on":[65],"recent":[66],"advances":[67],"bidirectional":[69,130],"transformers,":[70],"our":[71,132],"method":[72,133],"learns":[73],"decode":[75,118],"entire":[77],"spatio-temporal":[78],"volume":[79],"a":[81,93,108],"parallel":[84],"partially":[86],"observed":[87],"patches.":[88],"The":[89],"proposed":[90],"transformer":[91],"achieves":[92],"linear":[94,127],"both":[98,147],"encoding":[99],"decoding,":[101],"by":[102,126],"projecting":[103],"observable":[104],"context":[105],"tokens":[106,113,121],"into":[107],"fixed":[109],"number":[110],"latent":[112],"conditioning":[115],"them":[116],"masked":[120],"through":[122],"cross-attention.":[124],"Empowered":[125],"modeling,":[131],"demonstrates":[134],"significant":[135],"improvement":[136],"over":[137],"generating":[142],"moderately":[143],"long":[144],"quality":[148],"speed.":[150],"Videos":[151],"code":[153],"available":[155],"at":[156],"https://sites.google.com/view/mebt-cvpr2023.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
