{"id":"https://openalex.org/W7139027575","doi":"https://doi.org/10.48550/arxiv.2603.17920","title":"SegFly: A 2D-3D-2D Paradigm for Aerial RGB-Thermal Semantic Segmentation at Scale","display_name":"SegFly: A 2D-3D-2D Paradigm for Aerial RGB-Thermal Semantic Segmentation at Scale","publication_year":2026,"publication_date":"2026-03-18","ids":{"openalex":"https://openalex.org/W7139027575","doi":"https://doi.org/10.48550/arxiv.2603.17920"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.17920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.17920","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129798444","display_name":"Markus Gross","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gross, Markus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130202982","display_name":"Sai Bharadhwaj Matha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matha, Sai Bharadhwaj","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130125659","display_name":"Rui Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129899854","display_name":"Viswanathan Muthuveerappan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muthuveerappan, Viswanathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129915341","display_name":"Conrad Christoph","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Christoph, Conrad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129896386","display_name":"Julius Huber","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huber, Julius","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130134780","display_name":"Daniel Cremers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cremers, Daniel","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129798444"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.3366999924182892,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.3366999924182892,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2930999994277954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.09369999915361404,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.7674000263214111},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.6607000231742859},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6320000290870667},{"id":"https://openalex.org/keywords/aerial-image","display_name":"Aerial image","score":0.49630001187324524},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.4925000071525574},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4666999876499176},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4636000096797943},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.4302999973297119}],"concepts":[{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.7674000263214111},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7537999749183655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.717199981212616},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6633999943733215},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.6607000231742859},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6320000290870667},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.49630001187324524},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.4925000071525574},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4666999876499176},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4636000096797943},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.4302999973297119},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3961000144481659},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.36309999227523804},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C2961294","wikidata":"https://www.wikidata.org/wiki/Q166863","display_name":"Color space","level":3,"score":0.27489998936653137},{"id":"https://openalex.org/C195958017","wikidata":"https://www.wikidata.org/wiki/Q1675268","display_name":"Iterative closest point","level":3,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.17920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.17920","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17920","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7890539765357971,"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Semantic":[0],"segmentation":[1,227],"for":[2,9,222,250],"uncrewed":[3],"aerial":[4,10,62,181],"vehicles":[5],"(UAVs)":[6],"is":[7],"fundamental":[8],"scene":[11,253],"understanding,":[12],"yet":[13],"existing":[14,179],"RGB":[15,75,79,93,122,193,223],"and":[16,24,35,80,101,124,132,171,195,207,213,224,228,234,256],"RGB-T":[17,40,165,201],"datasets":[18],"remain":[19],"limited":[20],"in":[21,60],"scale,":[22],"diversity,":[23],"annotation":[25,134],"efficiency":[26],"due":[27],"to":[28,64,77,147,159,178],"the":[29,36,219,244],"high":[30],"cost":[31],"of":[32,38,72,92,121,126,246],"manual":[33,139],"labeling":[34],"difficulties":[37],"accurate":[39],"alignment":[41,157,166],"on":[42],"off-the-shelf":[43],"UAVs.":[44],"To":[45],"address":[46],"these":[47],"challenges,":[48],"we":[49,183,217],"propose":[50],"a":[51,69,84,96,186],"scalable":[52,251],"geometry-driven":[53,247],"2D-3D-2D":[54,145,248],"paradigm":[55,146],"that":[56,230],"leverages":[57],"multi-view":[58],"redundancy":[59],"high-overlap":[61],"imagery":[63],"automatically":[65,118],"propagate":[66],"labels":[67,123,128],"from":[68,240],"small":[70],"subset":[71],"manually":[73],"annotated":[74],"images":[76,94,194],"both":[78,231],"thermal":[81,127,225],"modalities":[82],"within":[83],"unified":[85],"framework.":[86],"By":[87],"lifting":[88],"less":[89],"than":[90,197],"3%":[91],"into":[95,104],"semantic":[97,226],"3D":[98,152],"point":[99],"cloud":[100],"reprojecting":[102],"it":[103],"all":[105],"views,":[106],"our":[107,176],"approach":[108],"enables":[109],"dense":[110],"pseudo":[111],"ground-truth":[112],"generation":[113],"across":[114,210],"large":[115],"image":[116,149],"collections,":[117],"producing":[119],"97%":[120],"100%":[125],"while":[129],"achieving":[130],"91%":[131],"88%":[133],"accuracy":[135,170],"without":[136],"any":[137],"2D":[138],"refinement.":[140],"We":[141],"further":[142],"extend":[143],"this":[144],"cross-modal":[148],"registration,":[150],"using":[151],"geometry":[153],"as":[154],"an":[155],"intermediate":[156],"space":[158],"obtain":[160],"fully":[161],"automatic,":[162],"strong":[163],"pixel-level":[164],"with":[167,189],"87%":[168],"registration":[169],"no":[172],"hardware-level":[173],"synchronization.":[174],"Applying":[175],"framework":[177],"geo-referenced":[180],"imagery,":[182],"construct":[184],"SegFly,":[185,216],"large-scale":[187],"benchmark":[188],"over":[190],"20,000":[191],"high-resolution":[192],"more":[196],"15,000":[198],"geometrically":[199],"aligned":[200],"pairs":[202],"spanning":[203],"diverse":[204],"urban,":[205],"industrial,":[206],"rural":[208],"environments":[209],"multiple":[211],"altitudes":[212],"seasons.":[214],"On":[215],"establish":[218],"Firefly":[220],"baseline":[221],"show":[229],"conventional":[232],"architectures":[233],"vision":[235],"foundation":[236],"models":[237],"benefit":[238],"substantially":[239],"SegFly":[241],"supervision,":[242],"highlighting":[243],"potential":[245],"pipelines":[249],"multi-modal":[252],"understanding.":[254],"Data":[255],"Code":[257],"available":[258],"at":[259],"https://github.com/markus-42/SegFly.":[260]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
