{"id":"https://openalex.org/W7140323215","doi":"https://doi.org/10.48550/arxiv.2603.22872","title":"ForeSea: AI Forensic Search with Multi-modal Queries for Video Surveillance","display_name":"ForeSea: AI Forensic Search with Multi-modal Queries for Video Surveillance","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140323215","doi":"https://doi.org/10.48550/arxiv.2603.22872"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.22872","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22872","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.22872","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130629586","display_name":"Hyojin Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Park, Hyojin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130603160","display_name":"Yi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130573358","display_name":"Janghoon Cho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cho, Janghoon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060613793","display_name":"Sungha Choi","orcid":"https://orcid.org/0000-0003-2313-9243"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Sungha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130572672","display_name":"Jungsoo Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Jungsoo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130578879","display_name":"Taotao Jing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing, Taotao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130570633","display_name":"Shuai Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shuai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041437150","display_name":"Munawar Hayat","orcid":"https://orcid.org/0000-0002-2706-5985"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hayat, Munawar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130621721","display_name":"Dashan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Dashan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045723491","display_name":"Ning Bi","orcid":"https://orcid.org/0000-0002-7505-3997"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bi, Ning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130572205","display_name":"Fatih Porikli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Porikli, Fatih","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5130629586"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3774999976158142,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.3774999976158142,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.149399995803833,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.10949999839067459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7886999845504761},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6057000160217285},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5130000114440918},{"id":"https://openalex.org/keywords/clips","display_name":"CLIPS","score":0.36910000443458557},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.3677000105381012},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.35179999470710754}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8557999730110168},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7886999845504761},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6057000160217285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5271999835968018},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5130000114440918},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4715999960899353},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.397599995136261},{"id":"https://openalex.org/C2778739407","wikidata":"https://www.wikidata.org/wiki/Q165372","display_name":"CLIPS","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3677000105381012},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.35179999470710754},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34299999475479126},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C154586513","wikidata":"https://www.wikidata.org/wiki/Q4420972","display_name":"Tracking system","level":3,"score":0.26910001039505005},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2574999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.22872","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22872","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.22872","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22872","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"decades":[1],"of":[2,98,104,116],"work,":[3],"surveillance":[4,106],"still":[5],"struggles":[6],"to":[7,67,129,178,208,225],"find":[8],"specific":[9],"targets":[10],"across":[11],"long,":[12],"multi-camera":[13],"video.":[14],"Prior":[15],"methods":[16],"--":[17,25],"tracking":[18,147],"pipelines,":[19],"CLIP":[20],"based":[21],"models,":[22],"and":[23,34,95,120,161,181,191,217],"VideoRAG":[24,198,222],"require":[26],"heavy":[27],"manual":[28],"filtering,":[29],"capture":[30],"only":[31],"shallow":[32],"attributes,":[33],"fail":[35],"at":[36],"temporal":[37,118,192,215],"reasoning.":[38],"Real-world":[39],"searches":[40],"are":[41,63],"inherently":[42],"multimodal":[43,75,111,121,154,211],"(e.g.,":[44],"\"When":[45],"does":[46],"this":[47,57,79,130,228],"person":[48],"join":[49],"the":[50,53,158,165,205,220],"fight?\"":[51],"with":[52,74,92,109,140,213],"person's":[54],"image),":[55],"yet":[56],"setting":[58,70],"remains":[59],"underexplored.":[60],"Also,":[61],"there":[62],"no":[64],"proper":[65],"benchmarks":[66],"evaluate":[68],"those":[69],"-":[71],"asking":[72],"video":[73,90],"queries.":[76],"To":[77,200],"address":[78],"gap,":[80],"we":[81,132],"introduce":[82],"ForeSeaQA,":[83,185],"a":[84,141,153,172],"new":[85],"benchmark":[86,207],"specifically":[87],"designed":[88],"for":[89,171],"QA":[91],"image-and-text":[93],"queries":[94,180,212],"timestamped":[96],"annotations":[97],"key":[99],"events.":[100,183],"The":[101],"dataset":[102],"consists":[103],"long-horizon":[105],"footage":[107],"paired":[108],"diverse":[110],"questions,":[112],"enabling":[113],"systematic":[114],"evaluation":[115],"retrieval,":[117],"grounding,":[119,216],"reasoning":[122],"in":[123,227],"realistic":[124],"forensic":[125,137],"conditions.":[126],"Not":[127],"limited":[128],"benchmark,":[131],"propose":[133],"ForeSea,":[134],"an":[135],"AI":[136],"search":[138],"system":[139,166,223],"3-stage,":[142],"plug-and-play":[143],"pipeline.":[144],"(1)":[145],"A":[146],"module":[148,156],"filters":[149],"irrelevant":[150],"footage;":[151],"(2)":[152],"embedding":[155],"indexes":[157],"remaining":[159],"clips;":[160],"(3)":[162],"during":[163],"inference,":[164],"retrieves":[167],"top-K":[168],"candidate":[169],"clips":[170],"Video":[173],"Large":[174],"Language":[175],"Model":[176],"(VideoLLM)":[177],"answer":[179],"localize":[182],"On":[184],"ForeSea":[186,218],"improves":[187],"accuracy":[188],"by":[189,194],"3.5%":[190],"IoU":[193],"11.0":[195],"over":[196],"prior":[197],"models.":[199],"our":[201],"knowledge,":[202],"ForeSeaQA":[203],"is":[204,219],"first":[206,221],"support":[209],"complex":[210],"precise":[214],"built":[224],"excel":[226],"setting.":[229]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2026-03-26T00:00:00"}
