{"id":"https://openalex.org/W4414566320","doi":"https://doi.org/10.1109/access.2026.3684906","title":"Visual and Text Prompt Segmentation: A Novel Multi-Model Framework for Remote Sensing","display_name":"Visual and Text Prompt Segmentation: A Novel Multi-Model Framework for Remote Sensing","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W4414566320","doi":"https://doi.org/10.1109/access.2026.3684906"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3684906","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3684906","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","datacite","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3684906","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010907570","display_name":"Xing Zi","orcid":"https://orcid.org/0009-0001-4265-2205"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Xing Zi","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia"],"raw_orcid":"https://orcid.org/0009-0001-4265-2205","affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035743795","display_name":"Kairui Jin","orcid":"https://orcid.org/0000-0003-4433-5251"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Kairui Jin","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082921939","display_name":"Xian Tao","orcid":"https://orcid.org/0000-0001-5834-5181"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xian Tao","raw_affiliation_strings":["Chinese Academy of Sciences, Institute of Automation, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences, Institute of Automation, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100635867","display_name":"Jun Li","orcid":"https://orcid.org/0000-0002-1336-2241"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jun Li","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023107987","display_name":"Ali Braytee","orcid":"https://orcid.org/0000-0003-2561-6496"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Ali Braytee","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0003-2561-6496","affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079357056","display_name":"Rajiv Ratn Shah","orcid":"https://orcid.org/0000-0003-1028-9373"},"institutions":[{"id":"https://openalex.org/I119939252","display_name":"Indraprastha Institute of Information Technology Delhi","ror":"https://ror.org/03vfp4g33","country_code":"IN","type":"education","lineage":["https://openalex.org/I119939252"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rajiv Ratn Shah","raw_affiliation_strings":["Department of Computer Science and Engineering, Indraprastha Institute of Information Technology, Delhi, India"],"raw_orcid":"https://orcid.org/0000-0003-1028-9373","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indraprastha Institute of Information Technology, Delhi, India","institution_ids":["https://openalex.org/I119939252"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Karthick Thiyagarajan","orcid":"https://orcid.org/0000-0002-4044-1711"},"institutions":[{"id":"https://openalex.org/I63525965","display_name":"Western Sydney University","ror":"https://ror.org/03t52dk35","country_code":"AU","type":"education","lineage":["https://openalex.org/I63525965"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Karthick Thiyagarajan","raw_affiliation_strings":["Smart Sensing and Robotics Laboratory (SensR Lab), Centre for Advanced Manufacturing Technology, Western Sydney University, Penrith, Australia"],"raw_orcid":"https://orcid.org/0000-0002-4044-1711","affiliations":[{"raw_affiliation_string":"Smart Sensing and Robotics Laboratory (SensR Lab), Centre for Advanced Manufacturing Technology, Western Sydney University, Penrith, Australia","institution_ids":["https://openalex.org/I63525965"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006355592","display_name":"Mukesh Prasad","orcid":"https://orcid.org/0000-0002-7745-9667"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Mukesh Prasad","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia"],"raw_orcid":"https://orcid.org/0000-0002-7745-9667","affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5010907570"],"corresponding_institution_ids":["https://openalex.org/I114017466"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00181684,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"63151","last_page":"63161"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9417999982833862,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9404000043869019,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.7694000005722046},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6823999881744385},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6704999804496765},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.567799985408783},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4787999987602234},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.44110000133514404},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4372999966144562},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4302000105381012}],"concepts":[{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.7694000005722046},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7613000273704529},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6823999881744385},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6704999804496765},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.5752999782562256},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5715000033378601},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.567799985408783},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5512999892234802},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4787999987602234},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.44110000133514404},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4372999966144562},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4302000105381012},{"id":"https://openalex.org/C183365957","wikidata":"https://www.wikidata.org/wiki/Q17140402","display_name":"Remote sensing application","level":3,"score":0.4106000065803528},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.39980000257492065},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C2776429412","wikidata":"https://www.wikidata.org/wiki/Q4688011","display_name":"Aerial image","level":3,"score":0.3589000105857849},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C39399123","wikidata":"https://www.wikidata.org/wiki/Q1348989","display_name":"Earth observation","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2637999951839447},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2632000148296356}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/access.2026.3684906","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3684906","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2503.07911","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.07911","pdf_url":"https://arxiv.org/pdf/2503.07911","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:doaj.org/article:c32d60f908cb4dd3908ddf4f0cfcd9d7","is_oa":true,"landing_page_url":"https://doaj.org/article/c32d60f908cb4dd3908ddf4f0cfcd9d7","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 63151-63161 (2026)","raw_type":"article"},{"id":"doi:10.48550/arxiv.2503.07911","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.07911","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3684906","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3684906","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Pixel-level":[0],"segmentation":[1],"is":[2,71],"critical":[3],"for":[4,79,107,143],"remote":[5,183],"sensing":[6,184],"applications,":[7],"yet":[8],"traditional":[9],"supervised":[10],"methods":[11],"suffer":[12],"from":[13],"high":[14],"annotation":[15],"costs.":[16],"While":[17],"foundational":[18,179],"vision":[19],"models":[20,180],"like":[21],"CLIP":[22,111],"and":[23,56,122,157],"the":[24,93],"Segment":[25],"Anything":[26],"Model":[27],"(SAM)":[28],"offer":[29],"zero-shot":[30],"capabilities,":[31],"they":[32],"struggle":[33],"with":[34],"domain-specific":[35],"challenges":[36],"in":[37],"aerial":[38],"imagery,":[39],"specifically:":[40],"(1)":[41],"scale":[42],"variation":[43],"causing":[44],"attention":[45,120],"drift,":[46],"(2)":[47],"lack":[48],"of":[49],"semantic":[50],"discrimination":[51],"leading":[52],"to":[53,60,104,126,181],"mask":[54],"redundancy,":[55],"(3)":[57],"poor":[58],"adaptation":[59],"overhead":[61,108],"perspectives.":[62],"To":[63],"bridge":[64],"this":[65],"gap":[66],"without":[67],"task-specific":[68],"fine-tuning,":[69],"VTPSeg":[70,86,159],"presented":[72],"as":[73,139],"a":[74,88,99,116],"coarse-to-fine":[75],"multi-model":[76],"framework":[77],"designed":[78],"high-precision":[80],"off-line":[81],"mapping.":[82],"Unlike":[83],"generic":[84],"integrations,":[85],"introduces":[87],"cohesive":[89],"semantic-geometric":[90],"synergy.":[91],"Specifically,":[92],"Grounding":[94],"DINO+":[95],"(GD+)":[96],"module":[97,113],"employs":[98],"novel":[100],"synonym-based":[101],"prompt":[102,173],"strategy":[103],"maximize":[105],"recall":[106],"objects.":[109],"The":[110],"Filter++":[112],"then":[114],"utilizes":[115],"dual-prompt":[117],"mechanism":[118],"(visual":[119],"circles":[121],"negative":[123],"text":[124],"constraints)":[125],"eliminate":[127],"false":[128],"positives":[129],"caused":[130],"by":[131],"background":[132],"clutter.":[133],"Finally,":[134],"these":[135],"refined":[136],"priors":[137],"serve":[138],"precise":[140],"point":[141],"prompts":[142],"FastSAM,":[144],"ensuring":[145],"instance-level":[146],"granularity.":[147],"Validated":[148],"on":[149],"five":[150,167],"diverse":[151,168],"datasets":[152],"(WHU,":[153],"LoveDA,":[154],"Inria,":[155],"xBD,":[156],"iSAID),":[158],"achieves":[160],"state-of-the-art":[161],"or":[162],"highly":[163],"competitive":[164],"performance":[165],"across":[166],"datasets,":[169],"demonstrating":[170],"that":[171],"strategic":[172],"engineering":[174],"can":[175],"effectively":[176],"adapt":[177],"frozen":[178],"complex":[182],"tasks.":[185]},"counts_by_year":[],"updated_date":"2026-05-05T06:06:40.768181","created_date":"2025-10-10T00:00:00"}
