{"id":"https://openalex.org/W4410637073","doi":"https://doi.org/10.1145/3701716.3715283","title":"Explainable Manipulated Videos Detection Using Multimodal Large Language Models","display_name":"Explainable Manipulated Videos Detection Using Multimodal Large Language Models","publication_year":2025,"publication_date":"2025-05-08","ids":{"openalex":"https://openalex.org/W4410637073","doi":"https://doi.org/10.1145/3701716.3715283"},"language":"en","primary_location":{"id":"doi:10.1145/3701716.3715283","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715283","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715283","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715283","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071501264","display_name":"Khoa-Dang Tran","orcid":"https://orcid.org/0000-0002-9385-6101"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Khoa-Dang Tran","raw_affiliation_strings":["University of New South Wales, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5071501264"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10004336,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"725","last_page":"728"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7127012014389038},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4612983167171478},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.425004780292511},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.35820311307907104}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7127012014389038},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4612983167171478},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.425004780292511},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35820311307907104}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3701716.3715283","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715283","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715283","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3701716.3715283","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715283","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715283","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410637073.pdf","grobid_xml":"https://content.openalex.org/works/W4410637073.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W2982058372","https://openalex.org/W4200097352","https://openalex.org/W4205602376","https://openalex.org/W4319336387","https://openalex.org/W4323657833","https://openalex.org/W4388153402","https://openalex.org/W4389474714","https://openalex.org/W4389752793","https://openalex.org/W4390037285","https://openalex.org/W4390190315","https://openalex.org/W4396758671","https://openalex.org/W4401213616","https://openalex.org/W4402670089","https://openalex.org/W4402727075","https://openalex.org/W4403791383"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Misinformation":[0],"detection":[1],"is":[2,139],"receiving":[3],"significant":[4],"attention":[5],"due":[6],"to":[7,44,128,140,149],"their":[8],"impact":[9],"on":[10,19,36,54,91],"modern":[11],"social":[12,61],"stability.":[13],"Extensive":[14],"research":[15,158],"has":[16],"been":[17],"conducted":[18],"multimodal":[20,92],"misinformation":[21],"detection,":[22],"employing":[23],"either":[24],"deep":[25],"learning":[26],"approaches":[27],"or":[28,78],"large":[29,93],"language":[30,94],"models":[31,95],"(LLMs).":[32],"However,":[33],"existing":[34,51],"literature":[35],"explainable":[37,98],"LLM-based":[38],"methods":[39],"remained":[40],"unexplored":[41],"when":[42],"applied":[43,148],"video":[45],"data.":[46],"Furthermore,":[47],"most":[48],"of":[49,68],"the":[50,134,145,150],"datasets":[52],"focus":[53],"facial":[55],"manipulation":[56],"and":[57,75,83,111,122,153,168],"human":[58],"speech,":[59],"whereas":[60],"media":[62],"content":[63],"encompasses":[64],"a":[65,88,123],"wide":[66],"range":[67],"manipulations":[69],"such":[70],"as":[71],"altering":[72],"background":[73],"context":[74],"color,":[76],"adding":[77],"removing":[79],"entities,":[80],"editing":[81],"audio":[82],"text.":[84],"This":[85,157],"paper":[86],"proposed":[87],"framework":[89,103],"based":[90],"(MLLMs)":[96],"for":[97,108,132,172],"manipulated":[99,142],"videos":[100],"detection.":[101],"The":[102,137],"involves":[104],"fine-tuning":[105,133],"two":[106],"MLLMs":[107],"rationales":[109],"generation":[110],"making":[112],"final":[113],"predictions,":[114],"respectively.":[115],"In":[116],"addition,":[117],"pre-trained":[118],"encoders,":[119],"cross-attention":[120],"mechanisms,":[121],"fusion":[124],"gate":[125],"are":[126],"utilized":[127],"extract":[129],"deeper":[130],"features":[131],"second":[135],"MLLM.":[136],"goal":[138],"predict":[141],"content,":[143],"identify":[144],"specific":[146],"attack":[147],"input":[151],"video,":[152],"provide":[154],"detailed":[155],"explanations.":[156],"discusses":[159],"preliminary":[160],"findings":[161],"in":[162],"literature,":[163],"presents":[164],"initial":[165],"experimental":[166],"results,":[167],"outlines":[169],"key":[170],"directions":[171],"future":[173],"work.":[174]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
