{"id":"https://openalex.org/W4405490806","doi":"https://doi.org/10.1109/iccp63557.2024.10793014","title":"Training-Free Open-Vocabulary Segmentation of Aerial Drone Images Using Foundation Models","display_name":"Training-Free Open-Vocabulary Segmentation of Aerial Drone Images Using Foundation Models","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4405490806","doi":"https://doi.org/10.1109/iccp63557.2024.10793014"},"language":"en","primary_location":{"id":"doi:10.1109/iccp63557.2024.10793014","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp63557.2024.10793014","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115513802","display_name":"Oana Morar","orcid":null},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Oana Morar","raw_affiliation_strings":["Technical University of Cluj-Napoca,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Romania","institution_ids":["https://openalex.org/I158333966"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077794784","display_name":"Andra Petrovai","orcid":"https://orcid.org/0000-0002-4036-6336"},"institutions":[{"id":"https://openalex.org/I158333966","display_name":"Technical University of Cluj-Napoca","ror":"https://ror.org/03r8nwp71","country_code":"RO","type":"education","lineage":["https://openalex.org/I158333966"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Andra Petrovai","raw_affiliation_strings":["Technical University of Cluj-Napoca,Romania"],"affiliations":[{"raw_affiliation_string":"Technical University of Cluj-Napoca,Romania","institution_ids":["https://openalex.org/I158333966"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5115513802"],"corresponding_institution_ids":["https://openalex.org/I158333966"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.24066256,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/drone","display_name":"Drone","score":0.8907520771026611},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7063499093055725},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6517820358276367},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.6078959703445435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6067909002304077},{"id":"https://openalex.org/keywords/aerial-image","display_name":"Aerial image","score":0.6054413914680481},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5797842741012573},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5244086980819702},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.50113844871521},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4594290256500244},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34222477674484253},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.21413210034370422},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.08418598771095276},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0641680359840393}],"concepts":[{"id":"https://openalex.org/C59519942","wikidata":"https://www.wikidata.org/wiki/Q650665","display_name":"Drone","level":2,"score":0.8907520771026611},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7063499093055725},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6517820358276367},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.6078959703445435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6067909002304077},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.6054413914680481},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5797842741012573},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5244086980819702},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.50113844871521},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4594290256500244},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34222477674484253},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.21413210034370422},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.08418598771095276},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0641680359840393},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccp63557.2024.10793014","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccp63557.2024.10793014","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 20th International Conference on Intelligent Computer Communication and Processing (ICCP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1516887802","https://openalex.org/W1861492603","https://openalex.org/W1923115158","https://openalex.org/W2035784046","https://openalex.org/W2037227137","https://openalex.org/W2561196672","https://openalex.org/W2612624696","https://openalex.org/W2886934227","https://openalex.org/W2893811450","https://openalex.org/W2981537222","https://openalex.org/W2997806675","https://openalex.org/W3028752951","https://openalex.org/W3034355852","https://openalex.org/W3190334976","https://openalex.org/W4214490042","https://openalex.org/W4283450732","https://openalex.org/W4307411363","https://openalex.org/W4312815172","https://openalex.org/W4313156423","https://openalex.org/W4386075561","https://openalex.org/W4390690473","https://openalex.org/W4390873076","https://openalex.org/W4390874575","https://openalex.org/W4399496596","https://openalex.org/W4402727760","https://openalex.org/W6703848168","https://openalex.org/W6755266879","https://openalex.org/W6758168261","https://openalex.org/W6760613829","https://openalex.org/W6791353385","https://openalex.org/W6811476558","https://openalex.org/W6850787431","https://openalex.org/W6862000706","https://openalex.org/W6866574656"],"related_works":["https://openalex.org/W4229448053","https://openalex.org/W4247925126","https://openalex.org/W4327774218","https://openalex.org/W2059768187","https://openalex.org/W4312858960","https://openalex.org/W4386036939","https://openalex.org/W4379143281","https://openalex.org/W4400597516","https://openalex.org/W1522196789","https://openalex.org/W4281729897"],"abstract_inverted_index":{"Semantic":[0],"and":[1,31,61,148,172,187],"instance":[2,62],"segmentation":[3,63,147],"have":[4],"seen":[5],"sig-nificant":[6],"advancements,":[7],"yet":[8],"their":[9,121],"application":[10,235],"to":[11,152,191,236],"aerial":[12,21,65,131,196],"imagery":[13],"faces":[14],"numerous":[15,29],"challenges.":[16],"The":[17,206],"high":[18,122],"complexity":[19],"of":[20,55,70,155,215],"images":[22,41,159,241],"which":[23],"often":[24],"capture":[25],"extensive":[26,111],"scenes":[27],"with":[28,102],"objects":[30,210],"varying":[32],"object":[33],"scales,":[34],"differ":[35],"considerably":[36],"from":[37,242],"the":[38,43,53,68,108,124,153,156,162,173,184,193,199,227,243],"simpler":[39],"natural":[40],"that":[42,182],"visual":[44,163,174],"deep":[45],"learning":[46],"models":[47,78,95,117,165,186],"are":[48,118,133],"usually":[49],"trained":[50,79],"on.":[51],"Additionally,":[52],"scarcity":[54],"annotated":[56,104,203],"datasets":[57,83],"for":[58,110,120,201],"pixel-level":[59,202],"semantic":[60,216],"in":[64,76],"imagery,":[66],"hinders":[67],"development":[69],"specialized":[71],"systems.":[72],"However,":[73],"re-cent":[74],"progress":[75],"foundational":[77],"on":[80,126,161,195,240],"vast,":[81],"diverse":[82],"has":[84],"improved":[85],"generalization":[86],"capabilities,":[87],"making":[88],"them":[89],"robust":[90],"across":[91],"various":[92],"scenarios.":[93],"These":[94],"can":[96,208],"be":[97],"applied":[98],"directly":[99],"or":[100,218],"adapted":[101,151],"minimal":[103],"data,":[105],"significantly":[106],"reducing":[107],"need":[109,200],"labeled":[112],"datasets.":[113],"Even":[114],"though":[115],"foundation":[116,164,185],"consecrated":[119],"generality,":[123],"results":[125],"specific":[127],"scenarios,":[128],"such":[129],"as":[130,135,137],"images,":[132],"not":[134],"good":[136],"expected.":[138],"In":[139],"this":[140],"paper,":[141],"we":[142],"provide":[143,231],"an":[144,180],"automatic":[145],"panoptic":[146],"annotation":[149],"system":[150],"characteristics":[154],"drone":[157],"vision":[158],"based":[160],"Grounding":[166],"DINO,":[167],"SAM":[168],"(Segment":[169],"Anything":[170],"Model)":[171],"language":[175],"model":[176],"CLIP.":[177],"We":[178,229],"propose":[179],"algorithm":[181,207],"combines":[183],"includes":[188],"processing":[189],"techniques":[190],"improve":[192],"performance":[194],"scenes,":[197],"eliminating":[198],"training":[204],"data.":[205],"segment":[209,219],"using":[211],"a":[212,223,232],"fixed":[213],"number":[214],"classes":[217],"new":[220],"ones":[221],"through":[222],"text":[224],"input":[225],"by":[226],"user.":[228],"also":[230],"demo":[233],"web":[234],"test":[237],"our":[238],"solution":[239],"UAVid":[244],"dataset.":[245]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
