{"id":"https://openalex.org/W4416749171","doi":"https://doi.org/10.1109/iros60139.2025.11247337","title":"Unveiling the Potential of Segment Anything Model 2 for RGB-Thermal Semantic Segmentation with Language Guidance","display_name":"Unveiling the Potential of Segment Anything Model 2 for RGB-Thermal Semantic Segmentation with Language Guidance","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749171","doi":"https://doi.org/10.1109/iros60139.2025.11247337"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247337","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247337","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102608993","display_name":"Jiayi Zhao","orcid":"https://orcid.org/0009-0007-3789-000X"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Jiayi Zhao","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100647693","display_name":"Fei Teng","orcid":"https://orcid.org/0000-0003-4913-7094"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Fei Teng","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102903106","display_name":"Kai Luo","orcid":"https://orcid.org/0000-0003-0229-0227"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kai Luo","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084836524","display_name":"Guoqiang Zhao","orcid":"https://orcid.org/0000-0002-6632-6412"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Guoqiang Zhao","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100396719","display_name":"Zhiyong Li","orcid":"https://orcid.org/0000-0001-9307-8453"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Zhiyong Li","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics,China","institution_ids":["https://openalex.org/I4210121405"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101651955","display_name":"Zheng Xu","orcid":"https://orcid.org/0009-0003-6747-3953"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Xu Zheng","raw_affiliation_strings":["Hong Kong University of Science and Technology,AI Thrust,Guangzhou,China"],"affiliations":[{"raw_affiliation_string":"Hong Kong University of Science and Technology,AI Thrust,Guangzhou,China","institution_ids":["https://openalex.org/I90610280","https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027010844","display_name":"Kailun Yang","orcid":"https://orcid.org/0000-0002-1090-667X"},"institutions":[{"id":"https://openalex.org/I4210121405","display_name":"Centre for Artificial Intelligence and Robotics","ror":"https://ror.org/01xnbq218","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1340206300","https://openalex.org/I4210121405","https://openalex.org/I4210150591"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kailun Yang","raw_affiliation_strings":["Hunan University,School of Artificial Intelligence and Robotics,China"],"affiliations":[{"raw_affiliation_string":"Hunan University,School of Artificial Intelligence and Robotics,China","institution_ids":["https://openalex.org/I4210121405"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102608993"],"corresponding_institution_ids":["https://openalex.org/I4210121405"],"apc_list":null,"apc_paid":null,"fwci":2.4362,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91248312,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2193","last_page":"2200"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5856999754905701,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5856999754905701,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1680999994277954,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.04809999838471413,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.630299985408783},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5878999829292297},{"id":"https://openalex.org/keywords/semantic-mapping","display_name":"Semantic mapping","score":0.5394999980926514},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4837999939918518},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.47850000858306885},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4456000030040741},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.43810001015663147},{"id":"https://openalex.org/keywords/active-perception","display_name":"Active perception","score":0.41830000281333923},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3621000051498413}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7544999718666077},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.630299985408783},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5928000211715698},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5878999829292297},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.5394999980926514},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4837999939918518},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.47850000858306885},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4456000030040741},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.43810001015663147},{"id":"https://openalex.org/C2776010242","wikidata":"https://www.wikidata.org/wiki/Q4677575","display_name":"Active perception","level":3,"score":0.41830000281333923},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4072999954223633},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3312999904155731},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3046000003814697},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.27790001034736633},{"id":"https://openalex.org/C28063669","wikidata":"https://www.wikidata.org/wiki/Q7167042","display_name":"Perceptual system","level":3,"score":0.26989999413490295},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2531000077724457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247337","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247337","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326952","display_name":"State Key Laboratory of Industrial Control Technology","ror":null},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1901129140","https://openalex.org/W1933349210","https://openalex.org/W2412782625","https://openalex.org/W2560023338","https://openalex.org/W2565639579","https://openalex.org/W2774839435","https://openalex.org/W2963881378","https://openalex.org/W2970231061","https://openalex.org/W3035467948","https://openalex.org/W3046194589","https://openalex.org/W3091001089","https://openalex.org/W3110495309","https://openalex.org/W3195411577","https://openalex.org/W3206439726","https://openalex.org/W4296913506","https://openalex.org/W4312402232","https://openalex.org/W4312443924","https://openalex.org/W4313142416","https://openalex.org/W4386065698","https://openalex.org/W4386179772","https://openalex.org/W4386275800","https://openalex.org/W4386737248","https://openalex.org/W4390873110","https://openalex.org/W4390874575","https://openalex.org/W4394863092","https://openalex.org/W4401415581","https://openalex.org/W4401416199","https://openalex.org/W4401567734","https://openalex.org/W4402703011","https://openalex.org/W4402961794","https://openalex.org/W4402980275","https://openalex.org/W4403757491","https://openalex.org/W4405785392","https://openalex.org/W4408634392","https://openalex.org/W4410539708"],"related_works":[],"abstract_inverted_index":{"The":[0,142,172],"perception":[1,25,28,170],"capability":[2],"of":[3,10,59,71,147],"robotic":[4,166],"systems":[5,167],"relies":[6],"on":[7,20,130,135,139],"the":[8,11,57,145,157],"richness":[9],"dataset.":[12],"Although":[13],"Segment":[14],"Anything":[15],"Model":[16],"2":[17],"(SAM2),":[18],"trained":[19],"large":[21,149],"datasets,":[22],"demonstrates":[23],"strong":[24],"potential":[26,58],"in":[27],"tasks,":[29,154],"its":[30],"inherent":[31,92],"training":[32],"paradigm":[33],"prevents":[34],"it":[35],"from":[36],"being":[37],"suitable":[38],"for":[39,64],"RGB-T":[40,152],"tasks.":[41],"To":[42],"address":[43],"these":[44],"challenges,":[45],"we":[46],"propose":[47],"SHIFNet,":[48],"a":[49,106],"novel":[50],"SAM2-driven":[51],"Hybrid":[52],"Interaction":[53],"Paradigm":[54],"that":[55,81,100],"unlocks":[56],"SAM2":[60],"with":[61,113,161,168],"linguistic":[62],"guidance":[63],"efficient":[65],"RGB-Thermal":[66],"perception.":[67],"Our":[68],"framework":[69,143],"consists":[70],"two":[72],"key":[73],"components:":[74],"(1)":[75],"Semantic-Aware":[76],"Cross-modal":[77],"Fusion":[78],"(SACF)":[79],"module":[80,109],"dynamically":[82],"balances":[83],"modality":[84],"contributions":[85],"through":[86,105],"text-guided":[87],"affinity":[88],"learning,":[89],"overcoming":[90],"SAM2\u2019s":[91],"RGB":[93],"bias;":[94],"(2)":[95],"Heterogeneous":[96],"Prompting":[97],"Decoder":[98],"(HPD)":[99],"enhances":[101],"global":[102],"semantic":[103,107,119],"information":[104],"enhancement":[108],"and":[110,137],"then":[111],"combined":[112],"category":[114],"embeddings":[115],"to":[116,151],"amplify":[117],"cross-modal":[118],"consistency.":[120],"With":[121],"32.27M":[122],"trainable":[123],"parameters,":[124],"SHIFNet":[125],"achieves":[126],"state-of-the-art":[127],"segmentation":[128,153],"performance":[129],"public":[131],"benchmarks,":[132],"reaching":[133],"89.8%":[134],"PST900":[136],"67.8%":[138],"FMB,":[140],"respectively.":[141],"facilitates":[144],"adaptation":[146],"pre-trained":[148],"models":[150],"effectively":[155],"mitigating":[156],"high":[158],"costs":[159],"associated":[160],"data":[162],"collection":[163],"while":[164],"endowing":[165],"comprehensive":[169],"capabilities.":[171],"source":[173],"code":[174],"will":[175],"be":[176],"made":[177],"publicly":[178],"available":[179],"at":[180],"https://github.com/iAsakiT3T/SHIFNet.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
