{"id":"https://openalex.org/W7126095447","doi":"https://doi.org/10.48550/arxiv.2601.20433","title":"MARE: Multimodal Alignment and Reinforcement for Explainable Deepfake Detection via Vision-Language Models","display_name":"MARE: Multimodal Alignment and Reinforcement for Explainable Deepfake Detection via Vision-Language Models","publication_year":2026,"publication_date":"2026-01-28","ids":{"openalex":"https://openalex.org/W7126095447","doi":"https://doi.org/10.48550/arxiv.2601.20433"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.20433","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124271110","display_name":"Wenbo Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Wenbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124283136","display_name":"Wei Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124268476","display_name":"Xiangyang Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Xiangyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124256488","display_name":"Jiantao Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jiantao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5124271110"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8109999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8109999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.02500000037252903,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.02419999986886978,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7807999849319458},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6028000116348267},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5353000164031982},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.365200012922287},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3506999909877777}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019000291824341},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7807999849319458},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.604200005531311},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6028000116348267},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5353000164031982},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47130000591278076},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3506999909877777},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2897999882698059},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.23690000176429749}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.20433","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.20433","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.20433","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.20433","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deepfake":[0,39,52,72],"detection":[1,53,73,123],"is":[2,8],"a":[3,106],"widely":[4],"researched":[5],"topic":[6],"that":[7,98,143],"crucial":[9],"for":[10,50],"combating":[11],"the":[12,22,63,91,130],"spread":[13],"of":[14,67,93,150],"malicious":[15],"content,":[16],"with":[17],"existing":[18],"methods":[19],"mainly":[20],"modeling":[21],"problem":[23],"as":[24],"classification":[25],"or":[26],"spatial":[27],"localization.":[28],"The":[29],"rapid":[30],"advancements":[31],"in":[32,71,148],"generative":[33],"models":[34],"impose":[35],"new":[36],"demands":[37],"on":[38,129],"detection.":[40],"In":[41],"this":[42],"paper,":[43],"we":[44],"propose":[45],"multimodal":[46],"alignment":[47],"and":[48,65,74,138,152],"reinforcement":[49,83],"explainable":[51],"via":[54],"vision-language":[55],"models,":[56],"termed":[57],"MARE,":[58],"which":[59],"aims":[60],"to":[61,89,100,110],"enhance":[62],"accuracy":[64,151],"reliability":[66],"Vision-Language":[68],"Models":[69],"(VLMs)":[70],"reasoning.":[75],"Specifically,":[76],"MARE":[77,104,144],"designs":[78],"comprehensive":[79],"reward":[80],"functions,":[81],"incorporating":[82],"learning":[84],"from":[85,115],"human":[86,101],"feedback":[87],"(RLHF),":[88],"incentivize":[90],"generation":[92],"text-spatially":[94],"aligned":[95],"reasoning":[96,131],"content":[97,132],"adheres":[99],"preferences.":[102],"Besides,":[103],"introduces":[105],"forgery":[107,113],"disentanglement":[108],"module":[109],"capture":[111],"intrinsic":[112],"traces":[114],"high-level":[116],"facial":[117],"semantics,":[118],"thereby":[119],"improving":[120],"its":[121],"authenticity":[122],"capability.":[124],"We":[125],"conduct":[126],"thorough":[127],"evaluations":[128],"generated":[133],"by":[134],"MARE.":[135],"Both":[136],"quantitative":[137],"qualitative":[139],"experimental":[140],"results":[141],"demonstrate":[142],"achieves":[145],"state-of-the-art":[146],"performance":[147],"terms":[149],"reliability.":[153]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-01-30T00:00:00"}
