{"id":"https://openalex.org/W4414170188","doi":"https://doi.org/10.1109/avss65446.2025.11149953","title":"Addressing Illiteracy of Vision-Language Model in Underrepresented Language Through Image-Text Mix Augmentation Scheme","display_name":"Addressing Illiteracy of Vision-Language Model in Underrepresented Language Through Image-Text Mix Augmentation Scheme","publication_year":2025,"publication_date":"2025-08-11","ids":{"openalex":"https://openalex.org/W4414170188","doi":"https://doi.org/10.1109/avss65446.2025.11149953"},"language":"en","primary_location":{"id":"doi:10.1109/avss65446.2025.11149953","is_oa":false,"landing_page_url":"https://doi.org/10.1109/avss65446.2025.11149953","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Advanced Visual and Signal-Based Systems (AVSS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101797435","display_name":"Seungju Lee","orcid":"https://orcid.org/0000-0002-7384-9158"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Seungju Lee","raw_affiliation_strings":["Samsung Software Academy for Youth,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Software Academy for Youth,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100710726","display_name":"Heejung Kim","orcid":"https://orcid.org/0000-0001-6215-5639"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Heejung Kim","raw_affiliation_strings":["Samsung Software Academy for Youth,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Software Academy for Youth,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048380146","display_name":"Jongwon Seo","orcid":"https://orcid.org/0000-0002-7310-7218"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jongwon Seo","raw_affiliation_strings":["Samsung Software Academy for Youth,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Software Academy for Youth,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101823798","display_name":"Minwook Kim","orcid":"https://orcid.org/0000-0002-8979-9976"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minwook Kim","raw_affiliation_strings":["Samsung Software Academy for Youth,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Software Academy for Youth,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100661012","display_name":"Seoyoung Lee","orcid":"https://orcid.org/0000-0002-1534-9411"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Seoyoung Lee","raw_affiliation_strings":["Samsung Software Academy for Youth,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Software Academy for Youth,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013916121","display_name":"Won-Chul Shin","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"WonChul Shin","raw_affiliation_strings":["Samsung Software Academy for Youth,South Korea"],"affiliations":[{"raw_affiliation_string":"Samsung Software Academy for Youth,South Korea","institution_ids":["https://openalex.org/I2250650973"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112677113","display_name":"Sunoh Kim","orcid":"https://orcid.org/0000-0003-4336-4851"},"institutions":[{"id":"https://openalex.org/I89015989","display_name":"Dankook University","ror":"https://ror.org/058pdbn81","country_code":"KR","type":"education","lineage":["https://openalex.org/I89015989"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sunoh Kim","raw_affiliation_strings":["Computer Engineering, Dankook University,South Korea"],"affiliations":[{"raw_affiliation_string":"Computer Engineering, Dankook University,South Korea","institution_ids":["https://openalex.org/I89015989"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101797435"],"corresponding_institution_ids":["https://openalex.org/I2250650973"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25430834,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.7939000129699707},{"id":"https://openalex.org/keywords/functional-illiteracy","display_name":"Functional illiteracy","score":0.7001000046730042},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6025000214576721},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44589999318122864},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4090999960899353},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.3075000047683716}],"concepts":[{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.7939000129699707},{"id":"https://openalex.org/C165205368","wikidata":"https://www.wikidata.org/wiki/Q152545","display_name":"Functional illiteracy","level":2,"score":0.7001000046730042},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6279000043869019},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6025000214576721},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5101000070571899},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5041000247001648},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44589999318122864},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4090999960899353},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.28630000352859497},{"id":"https://openalex.org/C2987933465","wikidata":"https://www.wikidata.org/wiki/Q141130","display_name":"Image manipulation","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2694000005722046},{"id":"https://openalex.org/C2989087649","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Image synthesis","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C547764534","wikidata":"https://www.wikidata.org/wiki/Q8236","display_name":"Literacy","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/avss65446.2025.11149953","is_oa":false,"landing_page_url":"https://doi.org/10.1109/avss65446.2025.11149953","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Advanced Visual and Signal-Based Systems (AVSS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W153185079","https://openalex.org/W2095905764","https://openalex.org/W2145023731","https://openalex.org/W2746314669","https://openalex.org/W2992308087","https://openalex.org/W3168867926","https://openalex.org/W4310609805","https://openalex.org/W4366330503","https://openalex.org/W4367367040","https://openalex.org/W4378942464","https://openalex.org/W4390041933","https://openalex.org/W4399491901","https://openalex.org/W4402713111","https://openalex.org/W4403624688","https://openalex.org/W4404783537","https://openalex.org/W4406489046","https://openalex.org/W4410536678"],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"open-source":[1,16],"large":[2],"Vision-Language":[3],"Models":[4],"(VLMs)":[5],"have":[6],"progressed":[7],"toward":[8],"achieving":[9],"performance":[10,165],"comparable":[11],"to":[12,19,43,119,147,149],"closed-source":[13],"VLMs.":[14],"However,":[15],"VLMs":[17],"struggle":[18],"recognize":[20],"unfamiliar":[21],"texts":[22,29,130],"depicted":[23],"in":[24,32,131,167],"the":[25,28,48,67,105,116,128,132,157],"images,":[26],"where":[27],"are":[30],"written":[31],"underrepresented":[33,49,68],"languages":[34,69],"like":[35],"Korean.":[36],"This":[37],"illiteracy":[38],"problem":[39],"is":[40,108,125],"primarily":[41],"due":[42],"insufficient":[44],"training":[45],"data":[46,65,142],"for":[47,66],"languages.":[50],"To":[51],"address":[52],"this":[53],"problem,":[54],"we":[55],"propose":[56],"a":[57,78,86,94,96,98,101,113,121,137,163],"novel":[58],"augmentation":[59],"scheme":[60,75],"that":[61],"generates":[62],"large-scale":[63],"image":[64,80,88,107,118],"with":[70,85,127,139],"minimal":[71],"manual":[72],"annotations.":[73],"Our":[74],"synthetically":[76],"combines":[77],"text":[79,106,133,151,168],"depicting":[81],"words":[82],"or":[83],"sentences":[84],"template":[87,117],"containing":[89],"textual":[90],"contexts,":[91],"such":[92],"as":[93],"receipt,":[95],"sign,":[97],"book,":[99],"and":[100,110],"product":[102],"label.":[103],"Specifically,":[104],"cut":[109],"pasted":[111],"into":[112],"patch":[114],"of":[115,159],"generate":[120],"synthetic":[122,141],"image,":[123],"which":[124],"labeled":[126],"corresponding":[129],"image.":[134],"Therefore,":[135],"fine-tuning":[136],"VLM":[138],"our":[140,160],"can":[143],"enhance":[144],"its":[145],"ability":[146],"generalize":[148],"real-world":[150],"recognition":[152],"tasks.":[153],"Experimental":[154],"results":[155],"demonstrate":[156],"effectiveness":[158],"scheme,":[161],"showing":[162],"significant":[164],"improvement":[166],"recognition.":[169]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
