{"id":"https://openalex.org/W4406858929","doi":"https://doi.org/10.1109/vcip63160.2024.10849863","title":"LMM-driven Semantic Image-Text Coding for Ultra Low-bitrate Learned Image Compression","display_name":"LMM-driven Semantic Image-Text Coding for Ultra Low-bitrate Learned Image Compression","publication_year":2024,"publication_date":"2024-12-08","ids":{"openalex":"https://openalex.org/W4406858929","doi":"https://doi.org/10.1109/vcip63160.2024.10849863"},"language":"en","primary_location":{"id":"doi:10.1109/vcip63160.2024.10849863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vcip63160.2024.10849863","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shimon Murai","orcid":null},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Shimon Murai","raw_affiliation_strings":["Waseda University,School of Fundamental Science and Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"Waseda University,School of Fundamental Science and Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101186999","display_name":"Heming Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I180203408","display_name":"Yokohama National University","ror":"https://ror.org/03zyp6p76","country_code":"JP","type":"education","lineage":["https://openalex.org/I180203408"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Heming Sun","raw_affiliation_strings":["Yokohama National University,Faculty of Engineering,Japan"],"affiliations":[{"raw_affiliation_string":"Yokohama National University,Faculty of Engineering,Japan","institution_ids":["https://openalex.org/I180203408"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002294739","display_name":"Jiro Katto","orcid":"https://orcid.org/0000-0002-1671-2614"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jiro Katto","raw_affiliation_strings":["Waseda University,School of Fundamental Science and Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"Waseda University,School of Fundamental Science and Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I150744194"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I150744194"],"apc_list":null,"apc_paid":null,"fwci":0.7223,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.7359011,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9639999866485596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9588000178337097,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7308916449546814},{"id":"https://openalex.org/keywords/image-compression","display_name":"Image compression","score":0.6392413377761841},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5396028161048889},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.518267810344696},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5020852088928223},{"id":"https://openalex.org/keywords/transform-coding","display_name":"Transform coding","score":0.4711814224720001},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4519680142402649},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4288822114467621},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.42187488079071045},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4065968990325928},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.2319001853466034},{"id":"https://openalex.org/keywords/discrete-cosine-transform","display_name":"Discrete cosine transform","score":0.12164437770843506},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11200091242790222},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.1036059558391571},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.0871698260307312}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7308916449546814},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.6392413377761841},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5396028161048889},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.518267810344696},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5020852088928223},{"id":"https://openalex.org/C169805256","wikidata":"https://www.wikidata.org/wiki/Q1361381","display_name":"Transform coding","level":4,"score":0.4711814224720001},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4519680142402649},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4288822114467621},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.42187488079071045},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4065968990325928},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2319001853466034},{"id":"https://openalex.org/C2221639","wikidata":"https://www.wikidata.org/wiki/Q2877","display_name":"Discrete cosine transform","level":3,"score":0.12164437770843506},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11200091242790222},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.1036059558391571},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0871698260307312},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/vcip63160.2024.10849863","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vcip63160.2024.10849863","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Visual Communications and Image Processing (VCIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320325763","display_name":"Telecommunications Advancement Foundation","ror":"https://ror.org/05y77zf79"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2101700394","https://openalex.org/W2962785568","https://openalex.org/W3034469748","https://openalex.org/W3160589897","https://openalex.org/W4281986181","https://openalex.org/W4312933868","https://openalex.org/W4382462760","https://openalex.org/W4386065641","https://openalex.org/W4398188163","https://openalex.org/W4404199570","https://openalex.org/W4404533930","https://openalex.org/W4405844682","https://openalex.org/W6765779288","https://openalex.org/W6780365925","https://openalex.org/W6791353385","https://openalex.org/W6850625674","https://openalex.org/W6851592950","https://openalex.org/W6853187832","https://openalex.org/W6853558715","https://openalex.org/W6856712200","https://openalex.org/W6857419123"],"related_works":["https://openalex.org/W2521595930","https://openalex.org/W4243608781","https://openalex.org/W3165542721","https://openalex.org/W4313046148","https://openalex.org/W1939109514","https://openalex.org/W4378191574","https://openalex.org/W2129829718","https://openalex.org/W1843792225","https://openalex.org/W2751842002","https://openalex.org/W2161981399"],"abstract_inverted_index":{"Supported":[0],"by":[1,31],"powerful":[2],"generative":[3],"models,":[4],"low-bitrate":[5],"learned":[6],"image":[7,33],"compression":[8,25],"(LIC)":[9],"models":[10,22],"utilizing":[11],"perceptual":[12,29],"metrics":[13],"have":[14],"become":[15],"feasible.":[16],"Some":[17],"of":[18],"the":[19],"most":[20],"advanced":[21],"achieve":[23],"high":[24],"rates":[26],"and":[27,53,87],"superior":[28],"quality":[30],"using":[32,41],"captions":[34,52],"as":[35],"sub-information.":[36],"This":[37],"paper":[38],"demonstrates":[39],"that":[40],"a":[42,57,63,75],"large":[43],"multi-modal":[44],"model":[45],"(LMM),":[46],"it":[47],"is":[48],"possible":[49],"to":[50,69,82],"generate":[51],"compress":[54],"them":[55],"within":[56],"single":[58],"model.":[59],"We":[60],"also":[61],"propose":[62],"novel":[64],"semantic-perceptual-oriented":[65],"fine-tuning":[66],"method":[67],"applicable":[68],"any":[70],"LIC":[71],"network,":[72],"resulting":[73],"in":[74,78],"41.58%":[76],"improvement":[77],"LPIPS":[79],"BD-rate":[80],"compared":[81],"existing":[83],"methods.":[84],"Our":[85],"implementation":[86],"pre-trained":[88],"weights":[89],"are":[90],"available":[91],"at":[92],"https://github.com/tokkiwa/ImageTextCoding.":[93]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
