{"id":"https://openalex.org/W4400578896","doi":"https://doi.org/10.1109/tcsvt.2024.3427426","title":"MaskCRT: Masked Conditional Residual Transformer for Learned Video Compression","display_name":"MaskCRT: Masked Conditional Residual Transformer for Learned Video Compression","publication_year":2024,"publication_date":"2024-07-12","ids":{"openalex":"https://openalex.org/W4400578896","doi":"https://doi.org/10.1109/tcsvt.2024.3427426"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3427426","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3427426","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003464674","display_name":"Yi-Hsin Chen","orcid":"https://orcid.org/0000-0002-8310-5718"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Yi-Hsin Chen","raw_affiliation_strings":["Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046117157","display_name":"Hong-Sheng Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hong-Sheng Xie","raw_affiliation_strings":["Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003413542","display_name":"Cheng\u2010Wei Chen","orcid":"https://orcid.org/0000-0003-4807-3340"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Cheng-Wei Chen","raw_affiliation_strings":["Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109669170","display_name":"Zong-Lin Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Zong-Lin Gao","raw_affiliation_strings":["Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091312350","display_name":"Martin Benjak","orcid":null},"institutions":[{"id":"https://openalex.org/I4210088543","display_name":"Institut f\u00fcr Informationsverarbeitung","ror":"https://ror.org/0047j9t38","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210088543"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin Benjak","raw_affiliation_strings":["Institut f&#x00FC;r Informationsverarbeitung, Leibniz Universit&#x00E4;t Hannover, Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Institut f&#x00FC;r Informationsverarbeitung, Leibniz Universit&#x00E4;t Hannover, Hannover, Germany","institution_ids":["https://openalex.org/I4210088543"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071531458","display_name":"Wen-Hsiao Peng","orcid":"https://orcid.org/0000-0002-4421-8031"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wen-Hsiao Peng","raw_affiliation_strings":["Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Yang Ming Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064913233","display_name":"J\u00f6rn \u00d6stermann","orcid":"https://orcid.org/0000-0002-6743-3324"},"institutions":[{"id":"https://openalex.org/I4210088543","display_name":"Institut f\u00fcr Informationsverarbeitung","ror":"https://ror.org/0047j9t38","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210088543"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"J\u00f6rn Ostermann","raw_affiliation_strings":["Institut f&#x00FC;r Informationsverarbeitung, Leibniz Universit&#x00E4;t Hannover, Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"Institut f&#x00FC;r Informationsverarbeitung, Leibniz Universit&#x00E4;t Hannover, Hannover, Germany","institution_ids":["https://openalex.org/I4210088543"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5003464674"],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":5.6914,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.97034188,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"34","issue":"11","first_page":"11980","last_page":"11992"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13579","display_name":"Image and Video Stabilization","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.6651589274406433},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6138095855712891},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5767836570739746},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.43113037943840027},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40677085518836975},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.37808430194854736},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2409524917602539},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.1003967821598053},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0932789146900177},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09061682224273682}],"concepts":[{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.6651589274406433},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6138095855712891},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5767836570739746},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.43113037943840027},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40677085518836975},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.37808430194854736},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2409524917602539},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.1003967821598053},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0932789146900177},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09061682224273682}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3427426","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3427426","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.44999998807907104,"display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G2404885208","display_name":null,"funder_award_id":"112-2634-F-A49-007-","funder_id":"https://openalex.org/F4320331164","funder_display_name":"National Science and Technology Council"},{"id":"https://openalex.org/G4961388589","display_name":null,"funder_award_id":"111-2923-E-A49-007-MY3","funder_id":"https://openalex.org/F4320331164","funder_display_name":"National Science and Technology Council"},{"id":"https://openalex.org/G7390146561","display_name":null,"funder_award_id":"110-2221-E-A49-065-MY3","funder_id":"https://openalex.org/F4320331164","funder_display_name":"National Science and Technology Council"}],"funders":[{"id":"https://openalex.org/F4320322410","display_name":"MediaTek","ror":"https://ror.org/05g9jck81"},{"id":"https://openalex.org/F4320331164","display_name":"National Science and Technology Council","ror":"https://ror.org/00wnb9798"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W2101700394","https://openalex.org/W2511458122","https://openalex.org/W2548527721","https://openalex.org/W2769654144","https://openalex.org/W2785562966","https://openalex.org/W2798514732","https://openalex.org/W2969260367","https://openalex.org/W2979990382","https://openalex.org/W3018065762","https://openalex.org/W3031546776","https://openalex.org/W3035195755","https://openalex.org/W3045658096","https://openalex.org/W3047936379","https://openalex.org/W3091266734","https://openalex.org/W3095479355","https://openalex.org/W3102015846","https://openalex.org/W3108139283","https://openalex.org/W3110286842","https://openalex.org/W3138516171","https://openalex.org/W3173272744","https://openalex.org/W3175457126","https://openalex.org/W3192179721","https://openalex.org/W3212865599","https://openalex.org/W4214743248","https://openalex.org/W4223425316","https://openalex.org/W4225672218","https://openalex.org/W4226132811","https://openalex.org/W4285483958","https://openalex.org/W4308234017","https://openalex.org/W4309134821","https://openalex.org/W4312547480","https://openalex.org/W4312774595","https://openalex.org/W4312806968","https://openalex.org/W4312959373","https://openalex.org/W4313021454","https://openalex.org/W4313058111","https://openalex.org/W4313291310","https://openalex.org/W4317555229","https://openalex.org/W4319299954","https://openalex.org/W4319663704","https://openalex.org/W4372271371","https://openalex.org/W4385486412","https://openalex.org/W4386065808","https://openalex.org/W4386075611","https://openalex.org/W4386597102","https://openalex.org/W4391305740","https://openalex.org/W6754634825","https://openalex.org/W6769015554","https://openalex.org/W6802036239","https://openalex.org/W6810924147","https://openalex.org/W6839976848"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Conditional":[0,31],"coding":[1,25,33,51,94,107,111,196],"has":[2,61],"lately":[3],"emerged":[4],"as":[5],"the":[6,27,55,58,69,155,159,163,167,176,183,213],"mainstream":[7],"approach":[8,171],"to":[9,43,81,101,129,131,153,170,175,193,210],"learned":[10,239],"video":[11,240],"compression.":[12,241],"However,":[13],"a":[14,38,62,90,98,103,113,119,133,139,148,233],"recent":[15],"study":[16],"shows":[17,206],"that":[18,57,67,73,141],"it":[19],"may":[20],"perform":[21],"worse":[22],"than":[23,66],"residual":[24,32,50,59,93,110,189,199],"when":[26],"information":[28],"bottleneck":[29],"arises.":[30],"was":[34],"thus":[35],"proposed,":[36],"creating":[37],"new":[39,234],"school":[40],"of":[41,68,105,165,185,220,227],"thought":[42],"improve":[44],"on":[45,54],"conditional":[46,49,92,106,109,121,188,195,198],"coding.":[47,200],"Notably,":[48],"relies":[52],"heavily":[53],"assumption":[56,75],"frame":[60],"lower":[63],"entropy":[64],"rate":[65],"intra":[70],"frame.":[71],"Recognizing":[72],"this":[74],"is":[76,142],"not":[77],"always":[78],"true":[79],"due":[80],"dis-occlusion":[82],"phenomena":[83],"or":[84],"unreliable":[85],"motion":[86],"estimates,":[87],"we":[88,146],"propose":[89,147],"masked":[91,187],"scheme.":[95],"It":[96,229],"learns":[97],"soft":[99],"mask":[100],"form":[102],"hybrid":[104],"and":[108,197,222],"in":[112,218,225],"pixel":[114],"adaptive":[115],"manner.":[116],"We":[117],"introduce":[118],"Transformer-based":[120,134],"autoencoder.":[122],"Several":[123],"strategies":[124],"are":[125],"investigated":[126],"with":[127,162],"regard":[128],"how":[130],"condition":[132],"autoencoder":[135],"for":[136,237],"inter-frame":[137],"coding,":[138],"topic":[140],"largely":[143],"under-explored.":[144],"Additionally,":[145],"channel":[149,160],"transform":[150],"module":[151],"(CTM)":[152],"decorrelate":[154],"image":[156],"latents":[157],"along":[158],"dimension,":[161],"aim":[164],"using":[166],"simple":[168],"hyperprior":[169],"similar":[172],"compression":[173],"performance":[174],"channel-wise":[177],"autoregressive":[178],"model.":[179],"Experimental":[180],"results":[181,209],"confirm":[182],"superiority":[184],"our":[186],"transformer":[190],"(termed":[191],"MaskCRT)":[192],"both":[194],"On":[201],"commonly":[202],"used":[203],"datasets,":[204],"MaskCRT":[205],"comparable":[207],"BD-rate":[208],"VTM-17.0":[211,224],"under":[212],"low":[214],"delay":[215],"P":[216],"configuration":[217],"terms":[219,226],"PSNR-RGB":[221],"outperforms":[223],"MS-SSIM-RGB.":[228],"also":[230],"opens":[231],"up":[232],"research":[235],"direction":[236],"advancing":[238]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
