{"id":"https://openalex.org/W7147325740","doi":"https://doi.org/10.1109/icaiic68212.2026.11454196","title":"Fine-Grained Rewards for Visual CoT: Mitigating Hallucinations in Vision-Language Models","display_name":"Fine-Grained Rewards for Visual CoT: Mitigating Hallucinations in Vision-Language Models","publication_year":2026,"publication_date":"2026-02-24","ids":{"openalex":"https://openalex.org/W7147325740","doi":"https://doi.org/10.1109/icaiic68212.2026.11454196"},"language":null,"primary_location":{"id":"doi:10.1109/icaiic68212.2026.11454196","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icaiic68212.2026.11454196","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Artificial Intelligence in Information and Communication (ICAIIC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057568411","display_name":"Jimyung Park","orcid":"https://orcid.org/0000-0003-2655-5517"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jimyung Park","raw_affiliation_strings":["Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041548635","display_name":"Minhyuk Jeong","orcid":"https://orcid.org/0000-0002-2231-7039"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minhyuk Jeong","raw_affiliation_strings":["Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132566017","display_name":"Dongjun Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dongjun Kim","raw_affiliation_strings":["Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132561464","display_name":"Hyunjun Yuh","orcid":null},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyunjun Yuh","raw_affiliation_strings":["Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005103838","display_name":"Jeonghoon Mo","orcid":"https://orcid.org/0000-0001-5151-6486"},"institutions":[{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jeonghoon Mo","raw_affiliation_strings":["Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Yonsei University,Department of Industrial Engineering,Seoul,Republic of Korea","institution_ids":["https://openalex.org/I193775966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.44243077,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"535","last_page":"540"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5008000135421753,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5008000135421753,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.23309999704360962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.04010000079870224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hallucinating","display_name":"Hallucinating","score":0.942799985408783},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5910999774932861},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5461000204086304},{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.5449000000953674},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.36160001158714294},{"id":"https://openalex.org/keywords/preference-elicitation","display_name":"Preference elicitation","score":0.3546000123023987},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.34139999747276306},{"id":"https://openalex.org/keywords/schizophrenia","display_name":"Schizophrenia (object-oriented programming)","score":0.3303999900817871}],"concepts":[{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.942799985408783},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5910999774932861},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5461000204086304},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.5449000000953674},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5406000018119812},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5209000110626221},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.48590001463890076},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.420199990272522},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38530001044273376},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.36160001158714294},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.3546000123023987},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.34139999747276306},{"id":"https://openalex.org/C2776412080","wikidata":"https://www.wikidata.org/wiki/Q7431605","display_name":"Schizophrenia (object-oriented programming)","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3151000142097473},{"id":"https://openalex.org/C94361409","wikidata":"https://www.wikidata.org/wiki/Q7882500","display_name":"Uncertainty reduction theory","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C2993048729","wikidata":"https://www.wikidata.org/wiki/Q220821","display_name":"Visual methods","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.2831000089645386},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C2779321571","wikidata":"https://www.wikidata.org/wiki/Q7936605","display_name":"Visual learning","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2563999891281128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icaiic68212.2026.11454196","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icaiic68212.2026.11454196","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on Artificial Intelligence in Information and Communication (ICAIIC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.41592857241630554}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W4389523832","https://openalex.org/W4402670859","https://openalex.org/W4402727405"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Vision-Language":[1],"Models":[2],"(LVLMs)":[3],"often":[4],"hallucinate":[5],"objects,":[6],"relations,":[7],"or":[8],"attributes":[9],"not":[10],"grounded":[11],"in":[12],"the":[13,35,92],"input":[14],"image.":[15],"Existing":[16],"approaches":[17],"such":[18],"as":[19],"cross-entropy":[20],"training":[21],"and":[22,81],"response-level":[23],"preference":[24],"optimization":[25],"(e.g.,":[26],"RLHF,":[27],"DPO)":[28],"fail":[29],"to":[30],"explicitly":[31],"target":[32],"hallucinations":[33,70],"within":[34],"reasoning":[36,100],"process.":[37],"We":[38],"propose":[39],"a":[40],"fine-grained":[41],"reinforcement":[42],"learning":[43],"framework":[44],"for":[45,97],"Visual":[46],"Chain-of-Thought":[47],"(CoT),":[48],"decomposing":[49],"responses":[50],"into":[51],"[objects]":[52],"\u2192":[53,55],"[relations]":[54],"[answer]":[56],"with":[57,87,101],"stage-specific":[58],"rewards.":[59],"Using":[60],"Group":[61],"Relative":[62],"Preference":[63],"Optimization":[64],"(GRPO),":[65],"our":[66],"method":[67],"directly":[68],"penalizes":[69],"at":[71],"each":[72],"stage":[73],"while":[74],"ensuring":[75],"stable":[76],"training.":[77],"Experiments":[78],"on":[79],"POPE":[80],"VQAv2":[82],"show":[83],"substantial":[84],"hallucination":[85],"reduction":[86],"competitive":[88],"task":[89],"performance,":[90],"demonstrating":[91],"benefit":[93],"of":[94],"stage-wise":[95],"penalization":[96],"aligning":[98],"LVLM":[99],"visual":[102],"evidence.":[103]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-02T00:00:00"}
