{"id":"https://openalex.org/W4391661584","doi":"https://doi.org/10.1109/tcsvt.2024.3364153","title":"Rate-Distortion Optimized Cross Modal Compression With Multiple Domains","display_name":"Rate-Distortion Optimized Cross Modal Compression With Multiple Domains","publication_year":2024,"publication_date":"2024-02-08","ids":{"openalex":"https://openalex.org/W4391661584","doi":"https://doi.org/10.1109/tcsvt.2024.3364153"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3364153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3364153","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016259963","display_name":"Junlong Gao","orcid":"https://orcid.org/0000-0002-8734-1021"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Junlong Gao","raw_affiliation_strings":["National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040292663","display_name":"Chuanmin Jia","orcid":"https://orcid.org/0000-0002-7418-6245"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuanmin Jia","raw_affiliation_strings":["National Key Laboratory for Multimedia Information Processing, School of Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Multimedia Information Processing, School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031637081","display_name":"Zhimeng Huang","orcid":"https://orcid.org/0000-0001-8026-9349"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhimeng Huang","raw_affiliation_strings":["National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100385183","display_name":"Shanshe Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanshe Wang","raw_affiliation_strings":["National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039832462","display_name":"Siwei Ma","orcid":"https://orcid.org/0000-0002-2731-5403"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siwei Ma","raw_affiliation_strings":["National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018478553","display_name":"Wen Gao","orcid":"https://orcid.org/0000-0002-8070-802X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Gao","raw_affiliation_strings":["National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Engineering Research Center of Visual Technology, School of Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"School of Computer Science, National Engineering Research Center of Visual Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5016259963"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.7479,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.688451,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"34","issue":"8","first_page":"6978","last_page":"6992"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.764482319355011},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5738264918327332},{"id":"https://openalex.org/keywords/image-compression","display_name":"Image compression","score":0.5391933917999268},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.5266894102096558},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5189864039421082},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4958525598049164},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.44719433784484863},{"id":"https://openalex.org/keywords/data-compression-ratio","display_name":"Data compression ratio","score":0.4308859705924988},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.383627325296402},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3808833062648773},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35810381174087524},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3209730386734009},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.2150624692440033}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.764482319355011},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5738264918327332},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.5391933917999268},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.5266894102096558},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5189864039421082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4958525598049164},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.44719433784484863},{"id":"https://openalex.org/C94835093","wikidata":"https://www.wikidata.org/wiki/Q3113333","display_name":"Data compression ratio","level":5,"score":0.4308859705924988},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.383627325296402},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3808833062648773},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35810381174087524},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3209730386734009},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2150624692440033},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3364153","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3364153","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1558520778","display_name":null,"funder_award_id":"62101007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3833898155","display_name":null,"funder_award_id":"62088102","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6004317203","display_name":null,"funder_award_id":"62371008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7903788244","display_name":null,"funder_award_id":"62025101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1797268635","https://openalex.org/W1889081078","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2060108852","https://openalex.org/W2073501560","https://openalex.org/W2119717200","https://openalex.org/W2145023731","https://openalex.org/W2152281536","https://openalex.org/W2277195237","https://openalex.org/W2340897893","https://openalex.org/W2506483933","https://openalex.org/W2619383789","https://openalex.org/W2785562966","https://openalex.org/W2903617461","https://openalex.org/W2962918138","https://openalex.org/W2963084599","https://openalex.org/W2963758027","https://openalex.org/W2963966654","https://openalex.org/W2980998394","https://openalex.org/W3034469748","https://openalex.org/W3082548248","https://openalex.org/W3153469116","https://openalex.org/W3180355996","https://openalex.org/W3198139956","https://openalex.org/W3205687405","https://openalex.org/W4241071816","https://openalex.org/W4246233237","https://openalex.org/W4288083516","https://openalex.org/W4308244910","https://openalex.org/W4312933868","https://openalex.org/W4386322119","https://openalex.org/W4387436807","https://openalex.org/W4390874575","https://openalex.org/W6638319203","https://openalex.org/W6639432524","https://openalex.org/W6678262379","https://openalex.org/W6685322675","https://openalex.org/W6754634825","https://openalex.org/W6755207826","https://openalex.org/W6765779288","https://openalex.org/W6791353385","https://openalex.org/W6839015040","https://openalex.org/W6846556436"],"related_works":["https://openalex.org/W3206274587","https://openalex.org/W2521595930","https://openalex.org/W3165542721","https://openalex.org/W4313046148","https://openalex.org/W1939109514","https://openalex.org/W4378191574","https://openalex.org/W2129829718","https://openalex.org/W1843792225","https://openalex.org/W4243608781","https://openalex.org/W2387043686"],"abstract_inverted_index":{"Cross-modal":[0],"compression":[1,42,182,191,196],"(CMC)":[2],"aims":[3],"to":[4,19,39,104,154,185],"compress":[5],"highly":[6],"redundant":[7],"visual":[8],"data":[9],"into":[10,67],"compact,":[11],"common,":[12],"and":[13,34,79,111,136,188,198],"human-comprehensible":[14],"domains,":[15,106],"such":[16],"as":[17],"text,":[18],"preserve":[20],"semantic":[21,32,36,118,131,171,181],"fidelity.":[22],"However,":[23],"CMC":[24,59],"is":[25],"limited":[26],"by":[27],"a":[28,40,52,72,81,122,200],"constant":[29],"level":[30],"of":[31,93,117,133,143,170],"fidelity":[33,37,132],"constrained":[35],"due":[38],"single":[41,102],"domain":[43,103],"(plain":[44],"text).":[45],"To":[46,113],"address":[47],"these":[48],"issues,":[49],"we":[50,99,120],"propose":[51,148],"new":[53],"approach":[54],"called":[55],"Multiple-domains":[56],"rate-distortion":[57,123],"optimized":[58],"(M-CMC).":[60],"Specifically,":[61],"our":[62],"method":[63,166],"divides":[64],"the":[65,101,127,130,137,140,144,156,164],"image":[66,190,206],"two":[68],"complementary":[69],"representations:":[70],"(1)":[71],"structure":[73],"representation":[74,83],"with":[75,84,193],"an":[76],"edge":[77,108],"map,":[78],"(2)":[80],"texture":[82],"dense":[85],"captions,":[86],"which":[87],"include":[88],"numerous":[89],"region-caption":[90],"pairs":[91],"instead":[92],"plain":[94],"text.":[95,112,145],"In":[96],"this":[97],"way,":[98],"expand":[100],"multiple":[105],"namely,":[107],"maps,":[109],"regions,":[110],"achieve":[114],"diverse":[115,168],"levels":[116,169],"fidelity,":[119],"suggest":[121],"reward":[124,157],"function,":[125],"where":[126],"distortion":[128],"measures":[129,139],"reconstructed":[134],"images":[135],"rate":[138],"information":[141],"content":[142],"We":[146],"also":[147],"Multiple-stage":[149],"Self-Critical":[150],"Sequence":[151],"Training":[152],"(MSCST)":[153],"optimize":[155],"function.":[158],"Extensive":[159],"experimental":[160],"results":[161],"demonstrate":[162],"that":[163],"proposed":[165],"achieves":[167,179],"translation":[172],"more":[173],"effectively":[174],"than":[175],"other":[176],"CMC-based":[177],"methods,":[178],"higher":[180],"performance":[183],"compared":[184],"traditional":[186],"block-based":[187],"learning-based":[189],"frameworks":[192],"97,000-500":[194],"times":[195],"ratio,":[197],"provides":[199],"simple":[201],"yet":[202],"effective":[203],"way":[204],"for":[205],"editing.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
