{"id":"https://openalex.org/W7137985266","doi":"https://doi.org/10.1609/aaai.v40i27.39387","title":"Refine-IQA: Multi-Stage Reinforcement Finetuning for Perceptual Image Quality Assessment","display_name":"Refine-IQA: Multi-Stage Reinforcement Finetuning for Perceptual Image Quality Assessment","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137985266","doi":"https://doi.org/10.1609/aaai.v40i27.39387"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i27.39387","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39387","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i27.39387","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129679225","display_name":"Ziheng Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziheng Jia","raw_affiliation_strings":["Shanghai Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036738962","display_name":"Jiaying Qian","orcid":"https://orcid.org/0000-0002-8783-4942"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaying Qian","raw_affiliation_strings":["Shanghai Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129646250","display_name":"Zicheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zicheng Zhang","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129669303","display_name":"Zijian Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zijian Chen","raw_affiliation_strings":["Shanghai Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129716063","display_name":"Xiongkuo Min","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongkuo Min","raw_affiliation_strings":["Shanghai Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5129679225"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23175966,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"27","first_page":"22301","last_page":"22309"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9563000202178955,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9563000202178955,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.023600000888109207,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.005799999926239252,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6632999777793884},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6474999785423279},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6381999850273132},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5863999724388123},{"id":"https://openalex.org/keywords/image-quality","display_name":"Image quality","score":0.5755000114440918},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.43160000443458557},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.40689998865127563},{"id":"https://openalex.org/keywords/human-visual-system-model","display_name":"Human visual system model","score":0.3930000066757202}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7900999784469604},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6632999777793884},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6474999785423279},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6381999850273132},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6363999843597412},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5863999724388123},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.5755000114440918},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4505000114440918},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.43160000443458557},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.40689998865127563},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.3930000066757202},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3472000062465668},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C2779346075","wikidata":"https://www.wikidata.org/wiki/Q7268763","display_name":"Quality Score","level":3,"score":0.31450000405311584},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.310699999332428},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.301800012588501},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.30000001192092896},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C2778251979","wikidata":"https://www.wikidata.org/wiki/Q7936617","display_name":"Visual processing","level":3,"score":0.26669999957084656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i27.39387","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39387","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i27.39387","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39387","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"fine-tuning":[1],"(RFT)":[2],"is":[3,16],"a":[4,137,165],"proliferating":[5],"paradigm":[6,163],"for":[7,47,143],"LMM":[8],"training.":[9],"Analogous":[10],"to":[11,19,37,86,121],"high-level":[12],"reasoning":[13],"tasks,":[14],"RFT":[15,93,114],"similarly":[17],"applicable":[18],"low-level":[20,73],"vision":[21],"domains,":[22],"including":[23],"image":[24],"quality":[25,75,126,132,179],"assessment":[26],"(IQA).":[27],"Existing":[28],"RFT-based":[29],"IQA":[30,65,94],"methods":[31,59],"typically":[32,60],"use":[33],"rule-based":[34],"output":[35],"rewards":[36],"verify":[38],"the":[39,48,70,91,101,123,131,177],"model's":[40],"rollouts":[41],"but":[42],"provide":[43],"no":[44],"reward":[45,119,140],"supervision":[46],"\"think\u201d":[49,167],"process,":[50],"leaving":[51],"its":[52,80],"correctness":[53],"and":[54,111,116,158],"efficacy":[55],"uncontrolled.":[56],"Furthermore,":[57],"these":[58,87],"fine-tune":[61],"directly":[62],"on":[63,155,176],"downstream":[64],"tasks":[66],"without":[67],"explicitly":[68],"enhancing":[69],"model\u2019s":[71,124],"native":[72],"visual":[74,125],"perception,":[76],"which":[77],"may":[78],"constrain":[79],"performance":[81,154],"upper":[82],"bound.":[83],"In":[84,97,128],"response":[85],"gaps,":[88],"we":[89,99,135],"propose":[90],"multi\u2010stage":[92],"framework":[95],"(Refine-IQA).":[96],"Stage-1,":[98],"build":[100],"Refine-Perception-20K":[102],"dataset":[103],"(with":[104],"12":[105],"main":[106],"distortions,":[107],"20,907":[108],"locally-distorted":[109],"images,":[110],"over":[112],"55K":[113],"samples)":[115],"design":[117],"multi-task":[118],"functions":[120],"strengthen":[122],"perception.":[127],"Stage-2,":[129],"targeting":[130],"scoring":[133,159],"task,":[134],"introduce":[136],"probability":[138],"difference":[139],"involved":[141],"strategy":[142],"\"think\"":[144],"process":[145],"supervision.":[146],"The":[147],"resulting":[148],"Refine-IQA":[149],"Series":[150],"Models":[151],"achieve":[152],"outstanding":[153],"both":[156],"perception":[157],"tasks\u2014and,":[160],"notably,":[161],"our":[162],"activates":[164],"robust":[166],"(quality":[168],"interpretating)":[169],"capability":[170],"that":[171],"also":[172],"attains":[173],"exceptional":[174],"results":[175],"corresponding":[178],"interpreting":[180],"benchmark.":[181]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
