{"id":"https://openalex.org/W4415535539","doi":"https://doi.org/10.1145/3746027.3754696","title":"VQA <sup>2</sup> : Visual Question Answering for Video Quality Assessment","display_name":"VQA <sup>2</sup> : Visual Question Answering for Video Quality Assessment","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415535539","doi":"https://doi.org/10.1145/3746027.3754696"},"language":"en","primary_location":{"id":"doi:10.1145/3746027.3754696","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754696","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075029034","display_name":"Ziheng Jia","orcid":"https://orcid.org/0009-0002-2623-4756"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziheng Jia","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0002-2623-4756","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101797192","display_name":"Zicheng Zhang","orcid":"https://orcid.org/0000-0002-7247-7938"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zicheng Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University, China, China"],"raw_orcid":"https://orcid.org/0000-0002-7247-7938","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, China, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036738962","display_name":"Jiaying Qian","orcid":"https://orcid.org/0000-0002-8783-4942"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaying Qian","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-8783-4942","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014879869","display_name":"Haoning Wu","orcid":"https://orcid.org/0000-0001-8642-8101"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haoning Wu","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-8642-8101","affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044700284","display_name":"Wei Sun","orcid":"https://orcid.org/0000-0001-8162-1949"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Sun","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-8162-1949","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002451902","display_name":"Chunyi Li","orcid":"https://orcid.org/0009-0007-0634-1710"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunyi Li","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-0634-1710","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087407889","display_name":"Xiaohong Liu","orcid":"https://orcid.org/0000-0001-6377-4730"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohong Liu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-6377-4730","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100403129","display_name":"Weisi Lin","orcid":"https://orcid.org/0000-0001-9866-1947"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Weisi Lin","raw_affiliation_strings":["Nanyang Technological University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-9866-1947","affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064168853","display_name":"Guangtao Zhai","orcid":"https://orcid.org/0000-0001-8165-9322"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangtao Zhai","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-8165-9322","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043405654","display_name":"Xiongkuo Min","orcid":"https://orcid.org/0000-0001-5693-0416"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongkuo Min","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-5693-0416","affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5075029034"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":4.533,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.95212137,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"6751","last_page":"6760"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6967999935150146},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6899999976158142},{"id":"https://openalex.org/keywords/video-quality","display_name":"Video quality","score":0.6136999726295471},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.42559999227523804},{"id":"https://openalex.org/keywords/subjective-video-quality","display_name":"Subjective video quality","score":0.42160001397132874},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.42010000348091125},{"id":"https://openalex.org/keywords/quality-assessment","display_name":"Quality assessment","score":0.41909998655319214},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3887999951839447}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7832000255584717},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6967999935150146},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6899999976158142},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.6136999726295471},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C114227958","wikidata":"https://www.wikidata.org/wiki/Q7631422","display_name":"Subjective video quality","level":4,"score":0.42160001397132874},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.42010000348091125},{"id":"https://openalex.org/C3020001037","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assessment","level":3,"score":0.41909998655319214},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.4047999978065491},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35850000381469727},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.3522999882698059},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3391999900341034},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.3176000118255615},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C105842133","wikidata":"https://www.wikidata.org/wiki/Q1899679","display_name":"Visual communication","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.2757999897003174},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2709999978542328},{"id":"https://openalex.org/C168820333","wikidata":"https://www.wikidata.org/wiki/Q448889","display_name":"Visual inspection","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.262800008058548},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.257999986410141},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746027.3754696","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754696","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/202547","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/202547","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8830604195","display_name":null,"funder_award_id":"62271312","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2602023803","https://openalex.org/W4387068132","https://openalex.org/W4403601500","https://openalex.org/W4405595839","https://openalex.org/W4413158035"],"related_works":[],"abstract_inverted_index":{"The":[0,145],"advent":[1],"and":[2,125,151,172,175,217,226],"proliferation":[3],"of":[4,122,158],"large":[5],"multi-modal":[6],"models":[7,148,182],"(LMMs)":[8],"have":[9,67],"introduced":[10],"new":[11],"paradigms":[12],"to":[13,154],"computer":[14],"vision,":[15],"transforming":[16],"various":[17,127],"tasks":[18,203],"into":[19],"a":[20,30,215],"unified":[21],"visual":[22,35,57,78,108,150,200],"question":[23,109],"answering":[24,110],"framework.":[25],"Video":[26],"Quality":[27],"Assessment":[28],"(VQA),":[29],"classic":[31],"field":[32],"in":[33,48,63,88,162,186,199,208],"low-level":[34,77,222],"perception,":[36],"focused":[37],"initially":[38],"on":[39,115,168],"quantitative":[40],"video":[41,90,116,128,169,223],"quality":[42,58,79,117,160,170,201,209,224],"scoring.":[43],"However,":[44],"driven":[45],"by":[46],"advances":[47],"LMMs,":[49],"it":[50],"is":[51],"now":[52],"progressing":[53],"toward":[54],"more":[55],"holistic":[56],"understanding":[59,173,202,227],"tasks.":[60,188,211],"Recent":[61],"studies":[62],"the":[64,89,103,141,156,179,193,196],"image":[65],"domain":[66],"demonstrated":[68],"that":[69,113,178],"Visual":[70],"Question":[71],"Answering":[72],"(VQA)":[73],"can":[74],"markedly":[75],"enhance":[76,155],"evaluation.":[80],"Nevertheless,":[81],"related":[82],"work":[83,213],"has":[84],"not":[85],"been":[86],"explored":[87],"domain,":[91],"leaving":[92],"substantial":[93],"room":[94],"for":[95,220],"improvement.":[96],"To":[97],"address":[98],"this":[99,137],"gap,":[100],"we":[101,139],"introduce":[102],"VQA\u00b2":[104,142,146,180],"Instruction":[105],"Dataset-the":[106],"first":[107],"instruction":[111,132],"dataset":[112,120],"focuses":[114],"assessment.":[118],"This":[119],"consists":[121],"3":[123],"subsets":[124],"covers":[126],"types,":[129],"containing":[130],"157,755":[131],"question-answer":[133],"pairs.":[134],"Then,":[135],"leveraging":[136],"foundation,":[138],"present":[140],"series":[143,147,181],"models.":[144],"interleave":[149],"motion":[152],"tokens":[153],"perception":[157],"spatial-temporal":[159],"details":[161],"videos.":[163],"We":[164],"conduct":[165],"extensive":[166],"experiments":[167],"scoring":[171,210],"tasks,":[174],"results":[176],"demonstrate":[177],"achieve":[183],"excellent":[184],"performance":[185],"both":[187],"Notably,":[189],"our":[190],"final":[191],"model,":[192],"VQA\u00b2-Assistant,":[194],"exceeds":[195],"renowned":[197],"GPT-4o":[198],"while":[204],"maintaining":[205],"strong":[206],"competitiveness":[207],"Our":[212],"provides":[214],"foundation":[216],"feasible":[218],"approach":[219],"integrating":[221],"assessment":[225],"with":[228],"LMMs.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-25T00:00:00"}
