{"id":"https://openalex.org/W4416749408","doi":"https://doi.org/10.1109/iros60139.2025.11247008","title":"Cross-modal State Space Modeling for Real-time RGB-thermal Wild Scene Semantic Segmentation","display_name":"Cross-modal State Space Modeling for Real-time RGB-thermal Wild Scene Semantic Segmentation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749408","doi":"https://doi.org/10.1109/iros60139.2025.11247008"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11247008","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101799485","display_name":"Xiaodong Guo","orcid":"https://orcid.org/0000-0003-3713-1269"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaodong Guo","raw_affiliation_strings":["Beijing Institute of Technology,School of Automation,Beijing,China,100081"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Automation,Beijing,China,100081","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zi\u2019ang Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zi\u2019ang Lin","raw_affiliation_strings":["Beijing Institute of Technology,School of Automation,Beijing,China,100081"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Automation,Beijing,China,100081","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089960678","display_name":"Lingling Hu","orcid":"https://orcid.org/0000-0003-0323-8543"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Luwen Hu","raw_affiliation_strings":["Beijing Institute of Technology,School of Automation,Beijing,China,100081"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Automation,Beijing,China,100081","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109136433","display_name":"Zhihong Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihong Deng","raw_affiliation_strings":["Beijing Institute of Technology,School of Automation,Beijing,China,100081"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Automation,Beijing,China,100081","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100392662","display_name":"Tong Liu","orcid":"https://orcid.org/0000-0002-6456-4767"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Liu","raw_affiliation_strings":["Beijing Institute of Technology,School of Automation,Beijing,China,100081"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology,School of Automation,Beijing,China,100081","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076872319","display_name":"Wujie Zhou","orcid":"https://orcid.org/0000-0002-3055-2493"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wujie Zhou","raw_affiliation_strings":["Zhejiang University of Science and Technology,School of Information and Electronic Engineering,Hangzhou,China,310023"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University of Science and Technology,School of Information and Electronic Engineering,Hangzhou,China,310023","institution_ids":["https://openalex.org/I168879160"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101799485"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":1.9419,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.8891152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5938","last_page":"5945"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6359999775886536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6359999775886536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.04769999906420708,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.03290000185370445,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6894000172615051},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.531499981880188},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.5012999773025513},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48429998755455017},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.44269999861717224},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4406000077724457},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4260999858379364},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4133000075817108},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.37929999828338623}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7235999703407288},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6894000172615051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6391000151634216},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.531499981880188},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.5012999773025513},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48429998755455017},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.44269999861717224},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4406000077724457},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4133000075817108},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40369999408721924},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.3702999949455261},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3695000112056732},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3458000123500824},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.28529998660087585},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.28450000286102295},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.2800999879837036},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11247008","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11247008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2774839435","https://openalex.org/W3091001089","https://openalex.org/W3198062544","https://openalex.org/W3211490618","https://openalex.org/W4285244413","https://openalex.org/W4366147345","https://openalex.org/W4367663172","https://openalex.org/W4376464632","https://openalex.org/W4381300351","https://openalex.org/W4385245566","https://openalex.org/W4385834506","https://openalex.org/W4386065698","https://openalex.org/W4386179772","https://openalex.org/W4386275800","https://openalex.org/W4388191449","https://openalex.org/W4390489106","https://openalex.org/W4390874070","https://openalex.org/W4392343478","https://openalex.org/W4394782240","https://openalex.org/W4401415581","https://openalex.org/W4402637315","https://openalex.org/W4403722461","https://openalex.org/W4404545717","https://openalex.org/W4404809244","https://openalex.org/W4405953164","https://openalex.org/W4409262789","https://openalex.org/W4409310739","https://openalex.org/W4415796023","https://openalex.org/W4415797492"],"related_works":[],"abstract_inverted_index":{"The":[0],"integration":[1],"of":[2,89],"RGB":[3,75],"and":[4,76,84,150],"thermal":[5,77],"data":[6,21],"can":[7],"significantly":[8],"improve":[9],"semantic":[10,46],"segmentation":[11,47],"performance":[12,142],"in":[13],"wild":[14],"environments":[15],"for":[16,32],"field":[17],"robots.":[18],"Nevertheless,":[19],"multi-source":[20],"processing":[22],"(e.g.":[23],"Transformer-based":[24,123],"approaches)":[25],"imposes":[26],"significant":[27],"computational":[28,128,152],"overhead,":[29],"presenting":[30],"challenges":[31],"resource-constrained":[33],"systems.":[34],"To":[35],"resolve":[36],"this":[37],"critical":[38],"limitation,":[39],"we":[40,64,96],"introduced":[41,65],"CM-SSM,":[42],"an":[43],"efficient":[44],"RGB-thermal":[45],"architecture":[48],"leveraging":[49],"a":[50,66,98],"cross-modal":[51,67,81,99],"state":[52,87,100],"space":[53,101],"modeling":[54],"(SSM)":[55],"approach.":[56],"Our":[57],"framework":[58],"comprises":[59],"two":[60],"key":[61],"components.":[62],"First,":[63],"2D-selective-scan":[68],"(CM-SS2D)":[69],"module":[70,104],"to":[71,132],"establish":[72],"SSM":[73],"between":[74],"modalities,":[78],"which":[79],"constructs":[80],"visual":[82],"sequences":[83],"derives":[85],"hidden":[86],"representations":[88],"one":[90],"modality":[91],"from":[92,110],"the":[93,144,157],"other.":[94],"Second,":[95],"developed":[97],"association":[102],"(CM-SSA)":[103],"that":[105,138],"effectively":[106],"integrates":[107],"global":[108],"associations":[109],"CM-SS2D":[111],"with":[112,122,130,147],"local":[113],"spatial":[114],"features":[115],"extracted":[116],"through":[117],"convolutional":[118],"operations.":[119],"In":[120],"contrast":[121],"approaches,":[124],"CM-SSM":[125,139],"achieves":[126,140],"linear":[127],"complexity":[129],"respect":[131],"image":[133],"resolution.":[134],"Experimental":[135],"results":[136],"show":[137],"state-of-the-art":[141],"on":[143,156],"CART":[145],"dataset":[146,159],"fewer":[148],"parameters":[149],"lower":[151],"cost.":[153],"Further":[154],"experiments":[155],"PST900":[158],"demonstrate":[160],"its":[161],"generalizability.":[162],"Codes":[163],"are":[164],"available":[165],"at":[166],"https://github.com/xiaodonguo/CMSSM.":[167]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-02T09:04:35.204637","created_date":"2025-11-28T00:00:00"}
