{"id":"https://openalex.org/W7135167052","doi":"https://doi.org/10.1109/iccp68926.2025.11427117","title":"Open-World Video Panoptic Segmentation for Aerial Perception using Foundation Models","display_name":"Open-World Video Panoptic Segmentation for Aerial Perception using Foundation Models","publication_year":2025,"publication_date":"2025-10-16","ids":{"openalex":"https://openalex.org/W7135167052","doi":"https://doi.org/10.1109/iccp68926.2025.11427117"},"language":null,"primary_location":{"id":"doi:10.1109/iccp68926.2025.11427117","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp68926.2025.11427117","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 21st International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129072398","display_name":"Natalia-Georgiana Boncea","orcid":null},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Natalia-Georgiana Boncea","raw_affiliation_strings":["Technical University of Cluj-Napoca,Faculty of Automation and Computer Science,Cluj-Napoca,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Faculty of Automation and Computer Science,Cluj-Napoca,Romania","institution_ids":["https://openalex.org/I158333966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067860606","display_name":"Vivian Chiciudean","orcid":"https://orcid.org/0009-0002-0798-5247"},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Vivian Chiciudean","raw_affiliation_strings":["Technical University of Cluj-Napoca,Computer Science Department,Cluj-Napoca,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Computer Science Department,Cluj-Napoca,Romania","institution_ids":["https://openalex.org/I158333966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061493057","display_name":"Florin Oniga","orcid":"https://orcid.org/0000-0003-4875-2220"},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Florin Oniga","raw_affiliation_strings":["Technical University of Cluj-Napoca,Computer Science Department,Cluj-Napoca,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Computer Science Department,Cluj-Napoca,Romania","institution_ids":["https://openalex.org/I158333966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112803157","display_name":"Sergiu Nedevschi","orcid":null},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Sergiu Nedevschi","raw_affiliation_strings":["Technical University of Cluj-Napoca,Computer Science Department,Cluj-Napoca,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Computer Science Department,Cluj-Napoca,Romania","institution_ids":["https://openalex.org/I158333966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5129072398"],"corresponding_institution_ids":["https://openalex.org/I158333966"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.76725122,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.133200004696846,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.133200004696846,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.1316000074148178,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.09679999947547913,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.554099977016449},{"id":"https://openalex.org/keywords/panopticon","display_name":"Panopticon","score":0.5145999789237976},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.43070000410079956},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4138999879360199},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.33559998869895935},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.33079999685287476}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6456999778747559},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6259999871253967},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.554099977016449},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5482000112533569},{"id":"https://openalex.org/C138569888","wikidata":"https://www.wikidata.org/wiki/Q828310","display_name":"Panopticon","level":3,"score":0.5145999789237976},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4138999879360199},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.33079999685287476},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.31859999895095825},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.26570001244544983},{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C133214962","wikidata":"https://www.wikidata.org/wiki/Q191839","display_name":"Aerial photography","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccp68926.2025.11427117","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp68926.2025.11427117","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 21st International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2113350594","https://openalex.org/W2340897893","https://openalex.org/W2910628332","https://openalex.org/W2965182628","https://openalex.org/W2999219213","https://openalex.org/W3028752951","https://openalex.org/W3035549667","https://openalex.org/W3176027594","https://openalex.org/W4286982960","https://openalex.org/W4307411363","https://openalex.org/W4310557340","https://openalex.org/W4312868348","https://openalex.org/W4386075819","https://openalex.org/W4386850012","https://openalex.org/W4390874575","https://openalex.org/W4390874670","https://openalex.org/W4399220169","https://openalex.org/W4399496596","https://openalex.org/W4403193080","https://openalex.org/W4404612908"],"related_works":[],"abstract_inverted_index":{"Accurate":[0],"scene":[1,193],"understanding":[2],"from":[3,231],"low-altitude":[4],"aerial":[5,53,156],"video":[6,47,100],"cameras":[7],"is":[8,109],"a":[9,165],"prerequisite":[10],"for":[11,52],"autonomous":[12],"urban":[13],"drones.":[14],"However,":[15],"current":[16],"methods":[17],"are":[18],"confined":[19],"to":[20,41,137,147,213],"closed":[21],"taxonomies":[22],"and":[23,32,60,65,89,96,144,187,243],"struggle":[24],"with":[25,62,102,206,217],"the":[26,44,70,80,116,125,138,142,148,152,176,191,211,224,241],"high":[27,218],"object":[28,104,178,219],"density,":[29],"scale":[30],"variation,":[31],"viewpoint":[33],"changes":[34],"typical":[35],"of":[36,127,141,154,184,200],"UAV":[37],"footage.":[38],"We":[39],"propose,":[40],"our":[42,77],"knowledge,":[43],"first":[45],"open-world":[46],"panoptic":[48],"segmentation":[49,95,183,205,242],"pipeline":[50,78,225],"tailored":[51],"perception,":[54],"driven":[55],"primarily":[56],"by":[57,115],"foundation":[58],"models":[59],"enhanced":[61],"domain-specific":[63],"pre-":[64],"post-processing":[66],"filters.":[67],"Built":[68],"around":[69],"Grounded":[71],"Segment":[72],"Anything":[73],"Model":[74],"(Grounded":[75],"SAM),":[76],"leverages":[79],"Decoupled":[81],"Video":[82],"Segmentation":[83],"(DEVA)":[84],"architecture.":[85],"This":[86],"enables":[87],"scalable":[88],"memory-efficient":[90],"temporal":[91],"propagation,":[92],"supporting":[93],"robust":[94],"tracking":[97,244],"over":[98],"long":[99,215],"sequences":[101],"dense":[103],"distributions.":[105],"A":[106],"key":[107],"innovation":[108],"an":[110],"adaptive":[111],"slicing":[112,167],"mechanism":[113],"inspired":[114],"Slicing":[117],"Aided":[118],"Hyper":[119],"Inference":[120],"(SAHI)":[121],"model,":[122],"which":[123,238],"adjusts":[124],"granularity":[126],"windowing":[128],"based":[129],"on":[130,175],"spatial":[131],"location.":[132],"It":[133],"applies":[134],"finer":[135],"slices":[136],"upper":[139],"part":[140],"frame":[143],"coarser":[145],"ones":[146],"lower":[149],"part,":[150],"improving":[151],"detection":[153],"small":[155,186],"objects":[157,189],"without":[158,246],"compromising":[159],"inference":[160],"speed.":[161],"Additionally,":[162],"we":[163],"introduce":[164],"class-aware":[166],"strategy":[168],"that":[169],"dynamically":[170],"resizes":[171],"SAHI":[172],"windows":[173],"depending":[174],"predicted":[177],"class":[179],"size,":[180],"enabling":[181],"effective":[182],"both":[185],"large":[188],"in":[190],"same":[192],"while":[194],"preserving":[195],"open-vocabulary":[196],"generalization.":[197],"The":[198],"use":[199],"Light":[201],"HQ-SAM":[202],"ensures":[203],"efficient":[204],"minimal":[207],"resource":[208],"overhead,":[209],"allowing":[210],"system":[212],"handle":[214],"videos":[216],"density.":[220],"Operating":[221],"fully":[222],"autonomously,":[223],"extracts":[226],"relevant":[227],"textual":[228],"labels":[229],"directly":[230],"input":[232],"frames":[233],"using":[234],"Llama":[235],"3.2":[236],"Vision,":[237],"seamlessly":[239],"guides":[240],"components":[245],"requiring":[247],"manual":[248],"input.":[249]},"counts_by_year":[],"updated_date":"2026-03-15T07:15:06.534987","created_date":"2026-03-14T00:00:00"}
