{"id":"https://openalex.org/W7138018326","doi":"https://doi.org/10.1609/aaai.v40i4.37248","title":"VQAThinker: Exploring Generalizable and Explainable Video Quality Assessment via Reinforcement Learning","display_name":"VQAThinker: Exploring Generalizable and Explainable Video Quality Assessment via Reinforcement Learning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138018326","doi":"https://doi.org/10.1609/aaai.v40i4.37248"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i4.37248","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37248","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i4.37248","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Linhan Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Linhan Cao","raw_affiliation_strings":["Shanghai Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wei Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Sun","raw_affiliation_strings":["East China Normal University"],"affiliations":[{"raw_affiliation_string":"East China Normal University","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Weixia Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weixia Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiangyang Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyang Zhu","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jun Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Jia","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kaiwei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiwei Zhang","raw_affiliation_strings":["Shanghai Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiaotong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dandan Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dandan Zhu","raw_affiliation_strings":["East China Normal University"],"affiliations":[{"raw_affiliation_string":"East China Normal University","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Guangtao Zhai","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangtao Zhai","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiongkuo Min","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongkuo Min","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30223881,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"4","first_page":"2607","last_page":"2615"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.0015999999595806003,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.0003000000142492354,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6696000099182129},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6183000206947327},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5967000126838684},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5543000102043152},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5501999855041504},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.5450999736785889},{"id":"https://openalex.org/keywords/video-quality","display_name":"Video quality","score":0.5335999727249146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7567999958992004},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6696000099182129},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6204000115394592},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6183000206947327},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6097999811172485},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5967000126838684},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5543000102043152},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5501999855041504},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.5450999736785889},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.5335999727249146},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4410000145435333},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.3919000029563904},{"id":"https://openalex.org/C114227958","wikidata":"https://www.wikidata.org/wiki/Q7631422","display_name":"Subjective video quality","level":4,"score":0.30979999899864197},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2777000069618225},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.2773999869823456},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2696000039577484}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i4.37248","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37248","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i4.37248","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37248","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.466987669467926,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video":[0],"quality":[1,9,69,95,140,183,189,199],"assessment":[2],"(VQA)":[3],"aims":[4],"to":[5,31,65,135,154,202],"objectively":[6],"quantify":[7],"perceptual":[8,75],"degradation":[10],"in":[11,42,195],"alignment":[12],"with":[13,62,227],"human":[14,74],"visual":[15],"perception.":[16],"Despite":[17],"recent":[18],"advances,":[19],"existing":[20,203],"VQA":[21,54,176,205,224],"models":[22,60,206,225],"still":[23],"suffer":[24],"from":[25],"two":[26],"critical":[27],"limitations:":[28],"poor":[29],"generalization":[30,180],"out-of-distribution":[32],"(OOD)":[33],"videos":[34,158],"and":[35,71,99,117,144,174,198,207,222],"limited":[36],"explainability,":[37],"which":[38],"restrict":[39],"their":[40,160],"applicability":[41],"real-world":[43],"scenarios.":[44],"To":[45],"address":[46],"these":[47],"challenges,":[48],"we":[49,78],"propose":[50],"VQAThinker,":[51],"a":[52,85,105,127,146],"reasoning-based":[53],"framework":[55],"that":[56,90,109,131,150,166,212],"leverages":[57],"large":[58],"multimodal":[59],"(LMMs)":[61],"reinforcement":[63,87,213],"learning":[64,88,214],"jointly":[66],"model":[67,134,153],"video":[68,94,142,182,188],"understanding":[70,190],"scoring,":[72],"emulating":[73],"decision-making.":[76],"Specifically,":[77],"adopt":[79],"group":[80],"relative":[81,139],"policy":[82],"optimization":[83],"(GRPO),":[84],"rule-guided":[86],"algorithm":[89],"enables":[91],"reasoning":[92],"over":[93,159],"under":[96],"score-level":[97,228],"supervision,":[98],"introduce":[100],"three":[101],"VQA-specific":[102],"rewards:":[103],"(1)":[104],"bell-shaped":[106],"regression":[107],"reward":[108,130,149],"increases":[110],"rapidly":[111],"as":[112],"the":[113,123,133,138,152],"prediction":[114],"error":[115],"decreases":[116],"becomes":[118],"progressively":[119],"less":[120],"sensitive":[121],"near":[122],"ground":[124],"truth;":[125],"(2)":[126],"pairwise":[128],"ranking":[129],"guides":[132],"correctly":[136],"determine":[137],"between":[141],"pairs;":[143],"(3)":[145],"temporal":[147],"consistency":[148],"encourages":[151],"prefer":[155],"temporally":[156],"coherent":[157],"perturbed":[161],"counterparts.":[162],"Extensive":[163],"experiments":[164],"demonstrate":[165,211],"VQAThinker":[167],"achieves":[168],"state-of-the-art":[169],"performance":[170],"on":[171,187],"both":[172],"in-domain":[173],"OOD":[175],"benchmarks,":[177],"showing":[178],"strong":[179],"for":[181],"scoring.":[184],"Furthermore,":[185],"evaluations":[186],"tasks":[191],"validate":[192],"its":[193],"superiority":[194],"distortion":[196],"attribution":[197],"description":[200],"compared":[201],"explainable":[204,223],"LMMs.":[208],"These":[209],"findings":[210],"offers":[215],"an":[216],"effective":[217],"pathway":[218],"toward":[219],"building":[220],"generalizable":[221],"solely":[226],"supervision.":[229]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-02-06T00:00:00"}
