{"id":"https://openalex.org/W7151369220","doi":"https://doi.org/10.48550/arxiv.2604.04780","title":"CLEAR: Unlocking Generative Potential for Degraded Image Understanding in Unified Multimodal Models","display_name":"CLEAR: Unlocking Generative Potential for Degraded Image Understanding in Unified Multimodal Models","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W7151369220","doi":"https://doi.org/10.48550/arxiv.2604.04780"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.04780","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04780","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.04780","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124900824","display_name":"Xiangzhao Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hao, Xiangzhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133106348","display_name":"Zefeng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zefeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133070860","display_name":"Zhenyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113067630","display_name":"Linhao Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Linhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133110755","display_name":"Yao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133142983","display_name":"Yiqian Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yiqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133102880","display_name":"Haiyun Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Haiyun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133131576","display_name":"Shuohuan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shuohuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133077842","display_name":"Yu Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5124900824"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.47110000252723694,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.47110000252723694,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.14149999618530273,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1251000016927719,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.7247999906539917},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6333000063896179},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5799000263214111},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.45910000801086426},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.396699994802475},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.37380000948905945},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3626999855041504},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.3418000042438507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7432000041007996},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.7247999906539917},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6442000269889832},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6333000063896179},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5799000263214111},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.45910000801086426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45339998602867126},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.396699994802475},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.37380000948905945},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3626999855041504},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3418000042438507},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2962999939918518},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C184408114","wikidata":"https://www.wikidata.org/wiki/Q1502022","display_name":"Generative Design","level":3,"score":0.2851000130176544},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.257099986076355}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.04780","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04780","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.04780","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04780","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image":[0],"degradation":[1,46,161],"from":[2],"blur,":[3],"noise,":[4],"compression,":[5],"and":[6,22,81,136,150,205],"poor":[7],"illumination":[8],"severely":[9],"undermines":[10],"multimodal":[11,17,167],"understanding":[12,21],"in":[13],"real-world":[14],"settings.":[15],"Unified":[16],"models":[18,50],"that":[19,45,97,124,145,171,187,202],"combine":[20],"generation":[23,78,135,152],"within":[24],"a":[25,29,95,110,120,130,141],"single":[26],"architecture":[27],"are":[28,208],"natural":[30],"fit":[31],"for":[32],"this":[33,63],"challenge,":[34],"as":[35],"their":[36,54],"generative":[37,56],"pathway":[38,85],"can":[39],"model":[40,75],"the":[41,74,82,99,115,126],"fine-grained":[42],"visual":[43,151,195,206],"structure":[44],"destroys.":[47],"Yet":[48],"these":[49],"fail":[51],"to":[52,65,76,113,193],"leverage":[53],"own":[55],"capacity":[57],"on":[58,109,176],"degraded":[59,177],"inputs.":[60],"We":[61,92,156],"trace":[62],"disconnect":[64],"two":[66,100],"compounding":[67],"factors:":[68],"existing":[69],"training":[70],"regimes":[71],"never":[72],"ask":[73],"invoke":[77],"during":[79],"reasoning,":[80],"standard":[83,166],"decode-reencode":[84,127],"does":[86],"not":[87],"support":[88],"effective":[89],"joint":[90],"optimization.":[91],"present":[93],"CLEAR,":[94],"framework":[96],"connects":[98],"capabilities":[101],"through":[102],"three":[103,160],"progressive":[104],"steps:":[105],"(1)":[106],"supervised":[107],"fine-tuning":[108],"degradation-aware":[111],"dataset":[112],"establish":[114],"generate-then-answer":[116],"reasoning":[117,149],"pattern;":[118],"(2)":[119],"Latent":[121],"Representation":[122],"Bridge":[123],"replaces":[125],"detour":[128],"with":[129,197],"direct,":[131],"optimizable":[132],"connection":[133],"between":[134],"reasoning;":[137],"(3)":[138],"Interleaved":[139],"GRPO,":[140],"reinforcement":[142],"learning":[143],"method":[144],"jointly":[146],"optimizes":[147],"text":[148],"under":[153],"answer-correctness":[154],"rewards.":[155],"construct":[157],"MMD-Bench,":[158],"covering":[159],"severity":[162],"levels":[163],"across":[164],"six":[165],"benchmarks.":[168],"Experiments":[169],"show":[170],"CLEAR":[172],"substantially":[173],"improves":[174],"robustness":[175],"inputs":[178],"while":[179],"preserving":[180],"clean-image":[181],"performance.":[182],"Our":[183],"analysis":[184],"further":[185],"reveals":[186],"removing":[188],"pixel-level":[189],"reconstruction":[190],"supervision":[191],"leads":[192],"intermediate":[194],"states":[196],"higher":[198],"perceptual":[199],"quality,":[200],"suggesting":[201],"task-driven":[203],"optimization":[204],"quality":[207],"naturally":[209],"aligned.":[210]},"counts_by_year":[],"updated_date":"2026-04-08T06:07:18.267832","created_date":"2026-04-08T00:00:00"}
