{"id":"https://openalex.org/W7154406389","doi":"https://doi.org/10.48550/arxiv.2604.10228","title":"SVSR: A Self-Verification and Self-Rectification Paradigm for Multimodal Reasoning","display_name":"SVSR: A Self-Verification and Self-Rectification Paradigm for Multimodal Reasoning","publication_year":2026,"publication_date":"2026-04-11","ids":{"openalex":"https://openalex.org/W7154406389","doi":"https://doi.org/10.48550/arxiv.2604.10228"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10228","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10228","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10228","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129752387","display_name":"Zhe Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qian, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133621461","display_name":"Nianbing Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Nianbing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133564629","display_name":"Zhonghua Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhonghua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104270736","display_name":"Hebei Li","orcid":"https://orcid.org/0000-0002-7529-6331"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hebei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122369633","display_name":"Zhongxing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhongxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133582584","display_name":"Yueying Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yueying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133583041","display_name":"Fei Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Fei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133491219","display_name":"Zhuohan Ouyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ouyang, Zhuohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020661081","display_name":"Yanbiao Ma","orcid":"https://orcid.org/0000-0002-8472-1475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yanbiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5129752387"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8427000045776367,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8427000045776367,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.02290000021457672,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.02160000056028366,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.538100004196167},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5192000269889832},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.5141000151634216},{"id":"https://openalex.org/keywords/non-monotonic-logic","display_name":"Non-monotonic logic","score":0.44209998846054077},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4399000108242035},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.4140999913215637},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.4000000059604645},{"id":"https://openalex.org/keywords/deductive-reasoning","display_name":"Deductive reasoning","score":0.38690000772476196},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.3747999966144562}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7243000268936157},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6351000070571899},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.538100004196167},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5192000269889832},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.5141000151634216},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4399000108242035},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4146000146865845},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.4140999913215637},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.4000000059604645},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.38690000772476196},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3747999966144562},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.3725999891757965},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.37220001220703125},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.35269999504089355},{"id":"https://openalex.org/C36964233","wikidata":"https://www.wikidata.org/wiki/Q7920942","display_name":"Verbal reasoning","level":3,"score":0.3456999957561493},{"id":"https://openalex.org/C107848011","wikidata":"https://www.wikidata.org/wiki/Q4680756","display_name":"Adaptive reasoning","level":4,"score":0.34279999136924744},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.3133000135421753},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.2793000042438507},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C183521366","wikidata":"https://www.wikidata.org/wiki/Q7256422","display_name":"Psychology of reasoning","level":4,"score":0.2556999921798706},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10228","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10228","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10228","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10228","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.462053507566452,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"multimodal":[1,52,210],"models":[2],"often":[3],"suffer":[4],"from":[5,76],"shallow":[6],"reasoning,":[7,175],"leading":[8],"to":[9,86,99,137,144,163],"errors":[10],"caused":[11],"by":[12,72,127],"incomplete":[13],"or":[14],"inconsistent":[15],"thought":[16],"processes.":[17],"To":[18],"address":[19],"this":[20,97],"limitation,":[21],"we":[22,65,91,106],"propose":[23],"Self-Verification":[24],"and":[25,35,45,51,83,140,146,159,166,207],"Self-Rectification":[26],"(SVSR),":[27],"a":[28,59,67,108,128],"unified":[29,69],"framework":[30],"that":[31,154],"explicitly":[32],"integrates":[33],"self-verification":[34],"self-rectification":[36],"into":[37],"the":[38,118,135,176,198],"model's":[39],"reasoning":[40,53,74,85,103,124,157,182,191],"pipeline,":[41],"substantially":[42],"improving":[43],"robustness":[44],"reliability":[46],"in":[47],"complex":[48],"visual":[49],"understanding":[50],"tasks.":[54],"SVSR":[55,155,201],"is":[56],"built":[57],"on":[58,96],"novel":[60],"three-stage":[61],"training":[62,119],"paradigm.":[63],"First,":[64],"construct":[66],"high-quality":[68],"preference":[70],"dataset":[71,98],"refining":[73],"traces":[75,125,192],"pre-trained":[77],"vision-language":[78],"models,":[79],"incorporating":[80],"both":[81],"forward":[82],"backward":[84],"embed":[87],"self-reflective":[88,174],"signals.":[89],"Second,":[90],"perform":[92],"cold-start":[93],"supervised":[94],"fine-tuning":[95],"learn":[100],"structured,":[101],"multi-step":[102],"behaviors.":[104],"Third,":[105],"apply":[107],"Semi-online":[109],"Direct":[110],"Preference":[111],"Optimization":[112],"(Semi-online":[113],"DPO)":[114],"process,":[115],"continuously":[116],"augmenting":[117],"corpus":[120],"with":[121,172],"high-quality,":[122],"model-generated":[123],"filtered":[126],"powerful":[129],"teacher":[130],"VLM.":[131],"This":[132],"pipeline":[133],"enables":[134,160],"model":[136,177],"learn,":[138],"elicit,":[139],"refine":[141],"its":[142],"ability":[143],"self-verify":[145],"self-rectify.":[147],"Extensive":[148],"experiments":[149],"across":[150],"diverse":[151],"benchmarks":[152],"demonstrate":[153],"improves":[156],"accuracy":[158],"stronger":[161],"generalization":[162],"unseen":[164],"tasks":[165],"question":[167],"types.":[168],"Notably,":[169],"once":[170],"trained":[171],"explicit":[173,190],"also":[178],"exhibits":[179],"improved":[180],"implicit":[181],"ability,":[183],"outperforming":[184],"strong":[185],"baselines":[186],"even":[187],"when":[188],"no":[189],"are":[193],"provided.":[194],"These":[195],"results":[196],"highlight":[197],"potential":[199],"of":[200],"for":[202],"building":[203],"more":[204],"dependable,":[205],"introspective,":[206],"cognitively":[208],"aligned":[209],"systems.":[211]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-15T00:00:00"}
