{"id":"https://openalex.org/W7158691682","doi":"https://doi.org/10.48550/arxiv.2604.26893","title":"Graph-based Semantic Calibration Network for Unaligned UAV RGBT Image Semantic Segmentation and A Large-scale Benchmark","display_name":"Graph-based Semantic Calibration Network for Unaligned UAV RGBT Image Semantic Segmentation and A Large-scale Benchmark","publication_year":2026,"publication_date":"2026-04-29","ids":{"openalex":"https://openalex.org/W7158691682","doi":"https://doi.org/10.48550/arxiv.2604.26893"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.26893","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.26893","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5098642961","display_name":"Fangqiang Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Fangqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134876950","display_name":"Zhicheng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zhicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134904923","display_name":"Xiaoliang Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Xiaoliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134903103","display_name":"Chenglong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Chenglong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134900117","display_name":"Jin Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Jin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8098999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8098999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.04820000007748604,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.008899999782443047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5672000050544739},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5077999830245972},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.43700000643730164},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4336000084877014},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4049000144004822},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39590001106262207},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.37779998779296875},{"id":"https://openalex.org/keywords/semantic-mapping","display_name":"Semantic mapping","score":0.3774999976158142},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3756999969482422}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7738000154495239},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.714900016784668},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5672000050544739},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5612999796867371},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5077999830245972},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.43700000643730164},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4336000084877014},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4049000144004822},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.37779998779296875},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.3774999976158142},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.37209999561309814},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.36739999055862427},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.27239999175071716},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.2596000134944916},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.26893","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.26893","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.26893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fine-grained":[0,156],"RGBT":[1,17,62,174],"image":[2,18,63,175,181],"semantic":[3,19,37,64,176,185],"segmentation":[4,20],"is":[5,210],"crucial":[6],"for":[7,59,171],"all-weather":[8],"unmanned":[9],"aerial":[10,45],"vehicle":[11],"(UAV)":[12],"scene":[13],"understanding.":[14],"However,":[15],"UAV":[16,61,126,173],"faces":[21],"two":[22],"coupled":[23],"challenges:":[24],"cross-modal":[25,189],"spatial":[26,97],"misalignment":[27],"caused":[28],"by":[29],"sensor":[30],"parallax":[31],"and":[32,35,72,83,87,119,133,146,167],"platform":[33],"vibration,":[34],"severe":[36],"confusion":[38],"among":[39,122],"fine-grained":[40,169,206],"ground":[41],"objects":[42],"under":[43],"top-down":[44],"views.":[46],"To":[47],"address":[48],"these":[49,135],"issues,":[50],"we":[51,67,105,151],"propose":[52,106],"a":[53,69,107,129],"Graph-based":[54],"Semantic":[55,108],"Calibration":[56,110],"Network":[57],"(GSCNet)":[58],"unaligned":[60,172],"segmentation.":[65],"Specifically,":[66],"design":[68],"Feature":[70],"Decoupling":[71],"Alignment":[73],"Module":[74,111],"(FDAM)":[75],"that":[76,113,196],"decouples":[77],"each":[78],"modality":[79,101],"into":[80,128,137],"shared":[81,93],"structural":[82],"private":[84],"perceptual":[85],"components":[86],"performs":[88],"deformable":[89],"alignment":[90],"in":[91,125],"the":[92,116,153,160,165],"subspace,":[94],"enabling":[95],"robust":[96],"correction":[98],"with":[99,187,202],"reduced":[100],"appearance":[102],"interference.":[103],"Moreover,":[104],"Graph":[109],"(SGCM)":[112],"explicitly":[114],"encodes":[115],"hierarchical":[117],"taxonomy":[118],"co-occurrence":[120],"regularities":[121],"ground-object":[123],"categories":[124,186],"scenes":[127],"structured":[130],"category":[131],"graph,":[132],"incorporates":[134],"priors":[136],"graph-attention":[138],"reasoning":[139],"to":[140,159],"calibrate":[141],"predictions":[142],"of":[143,162],"visually":[144],"similar":[145],"rare":[147],"categories.":[148,207],"In":[149],"addition,":[150],"construct":[152],"Unaligned":[154],"RGB-Thermal":[155],"(URTF)":[157],"benchmark,":[158],"best":[161],"our":[163],"knowledge,":[164],"largest":[166],"most":[168],"benchmark":[170],"segmentation,":[177],"containing":[178],"over":[179],"25,000":[180],"pairs":[182],"across":[183],"61":[184],"realistic":[188],"misalignment.":[190],"Extensive":[191],"experiments":[192],"on":[193,205],"URTF":[194],"demonstrate":[195],"GSCNet":[197],"significantly":[198],"outperforms":[199],"state-of-the-art":[200],"methods,":[201],"notable":[203],"gains":[204],"The":[208],"dataset":[209],"available":[211],"at":[212],"https://github.com/mmic-lcl/Datasets-and-benchmark-code.":[213]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-01T00:00:00"}
