{"id":"https://openalex.org/W7150989645","doi":"https://doi.org/10.48550/arxiv.2604.03040","title":"QVAD: A Question-Centric Agentic Framework for Efficient and Training-Free Video Anomaly Detection","display_name":"QVAD: A Question-Centric Agentic Framework for Efficient and Training-Free Video Anomaly Detection","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7150989645","doi":"https://doi.org/10.48550/arxiv.2604.03040"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03040","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03040","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03040","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133029234","display_name":"Lokman Bekit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bekit, Lokman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113172624","display_name":"Hamza Karim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karim, Hamza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133019514","display_name":"Nghia T Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Nghia T","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5098730611","display_name":"Yasin Yilmaz","orcid":"https://orcid.org/0000-0001-9445-5927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yilmaz, Yasin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9693999886512756,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9693999886512756,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.009100000374019146,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.0032999999821186066,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.6721000075340271},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6241999864578247},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.588699996471405},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.5131999850273132},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5076000094413757},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.3953000009059906},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.3720000088214874},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.35429999232292175},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3260999917984009}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7943000197410583},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.6721000075340271},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6241999864578247},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.588699996471405},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5586000084877014},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.5131999850273132},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5076000094413757},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4564000070095062},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.3953000009059906},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.3720000088214874},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C136536468","wikidata":"https://www.wikidata.org/wiki/Q1225894","display_name":"Undersampling","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.2842000126838684},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.28279998898506165},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.25110000371932983},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.2506999969482422},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03040","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03040","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03040","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03040","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6417229771614075}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video":[0],"Anomaly":[1],"Detection":[2],"(VAD)":[3],"is":[4,53],"a":[5,68,77,125],"fundamental":[6],"challenge":[7],"in":[8,51],"computer":[9],"vision,":[10],"particularly":[11],"due":[12],"to":[13,38,94],"the":[14,41,49,60,110,128,140],"open-set":[15],"nature":[16,62],"of":[17,43,63,113,127],"anomalies.":[18],"While":[19],"recent":[20],"training-free":[21],"approaches":[22],"utilizing":[23],"Vision-Language":[24],"Models":[25],"(VLMs)":[26],"have":[27],"shown":[28],"promise,":[29],"they":[30],"typically":[31],"rely":[32],"on":[33,85,119,139,159],"massive,":[34],"resource-intensive":[35],"foundation":[36],"models":[37],"compensate":[39],"for":[40],"ambiguity":[42],"static":[44,61],"prompts.":[45],"We":[46,65,134],"argue":[47],"that":[48,72],"bottleneck":[50],"VAD":[52,156],"not":[54],"necessarily":[55],"model":[56],"capacity,":[57],"but":[58],"rather":[59],"inquiry.":[64],"propose":[66],"QVAD,":[67],"question-centric":[69],"agentic":[70],"framework":[71],"treats":[73],"VLM-LLM":[74],"interaction":[75],"as":[76],"dynamic":[78],"dialogue.":[79],"By":[80],"iteratively":[81],"refining":[82],"queries":[83],"based":[84],"visual":[86],"context,":[87],"our":[88],"LLM":[89],"agent":[90],"guides":[91],"smaller":[92],"VLMs":[93],"produce":[95],"high-fidelity":[96],"captions":[97],"and":[98,122],"precise":[99],"semantic":[100],"reasoning":[101],"without":[102],"parameter":[103],"updates.":[104],"This":[105],"``prompt-updating\"":[106],"mechanism":[107],"effectively":[108],"unlocks":[109],"latent":[111],"capabilities":[112,157],"lightweight":[114],"models,":[115],"enabling":[116],"state-of-the-art":[117],"performance":[118],"UCF-Crime,":[120],"XD-Violence,":[121],"UBNormal":[123],"using":[124],"fraction":[126],"parameters":[129],"required":[130],"by":[131],"competing":[132],"methods.":[133],"further":[135],"demonstrate":[136],"exceptional":[137],"generalizability":[138],"single-scene":[141],"ComplexVAD":[142],"dataset.":[143],"Crucially,":[144],"QVAD":[145],"achieves":[146],"high":[147],"inference":[148],"speeds":[149],"with":[150],"minimal":[151],"memory":[152],"footprints,":[153],"making":[154],"advanced":[155],"deployable":[158],"resource-constrained":[160],"edge":[161],"devices.":[162]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-07T00:00:00"}
