{"id":"https://openalex.org/W7104748830","doi":"https://doi.org/10.1109/lsp.2025.3631433","title":"IEMFormer: Internal and External Multi-Fusion Transformer for Indoor RGB-D Semantic Segmentation","display_name":"IEMFormer: Internal and External Multi-Fusion Transformer for Indoor RGB-D Semantic Segmentation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W7104748830","doi":"https://doi.org/10.1109/lsp.2025.3631433"},"language":null,"primary_location":{"id":"doi:10.1109/lsp.2025.3631433","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3631433","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kaidi Hu","orcid":"https://orcid.org/0009-0004-9278-3176"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kaidi Hu","raw_affiliation_strings":["Global Institute of Future Technology, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0004-9278-3176","affiliations":[{"raw_affiliation_string":"Global Institute of Future Technology, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wei Li","orcid":"https://orcid.org/0000-0002-0059-3745"},"institutions":[{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Li","raw_affiliation_strings":["School of Intelligence Science and Technology, Nanjing University, Suzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-0059-3745","affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Nanjing University, Suzhou, China","institution_ids":["https://openalex.org/I308837","https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Guangwei Gao","orcid":"https://orcid.org/0000-0002-3950-1844"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangwei Gao","raw_affiliation_strings":["PCA Lab, the Key Lab of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-3950-1844","affiliations":[{"raw_affiliation_string":"PCA Lab, the Key Lab of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ruigang Yang","orcid":"https://orcid.org/0000-0001-5296-6307"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruigang Yang","raw_affiliation_strings":["Global Institute of Future Technology, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-5296-6307","affiliations":[{"raw_affiliation_string":"Global Institute of Future Technology, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.55611631,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":null,"first_page":"4424","last_page":"4428"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8763999938964844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8763999938964844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11164","display_name":"Remote Sensing and LiDAR Applications","score":0.01119999960064888,"subfield":{"id":"https://openalex.org/subfields/2305","display_name":"Environmental Engineering"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.005499999970197678,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5982000231742859},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5828999876976013},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44679999351501465},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4449000060558319},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.4212999939918518},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3950999975204468},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.39089998602867126},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.38999998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8102999925613403},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6643000245094299},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5982000231742859},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5828999876976013},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49079999327659607},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44679999351501465},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4449000060558319},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.4212999939918518},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3950999975204468},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.38999998569488525},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.38100001215934753},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3736000061035156},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.31940001249313354},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.2944999933242798},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.289900004863739},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.26030001044273376}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lsp.2025.3631433","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lsp.2025.3631433","pdf_url":null,"source":{"id":"https://openalex.org/S120629676","display_name":"IEEE Signal Processing Letters","issn_l":"1070-9908","issn":["1070-9908","1558-2361"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W125693051","https://openalex.org/W1923184257","https://openalex.org/W2108598243","https://openalex.org/W2527938858","https://openalex.org/W2747078409","https://openalex.org/W2971014764","https://openalex.org/W3170885821","https://openalex.org/W4200635035","https://openalex.org/W4205955091","https://openalex.org/W4220849367","https://openalex.org/W4225487034","https://openalex.org/W4298326888","https://openalex.org/W4312431763","https://openalex.org/W4313142416","https://openalex.org/W4376464632","https://openalex.org/W4383812946","https://openalex.org/W4384519183","https://openalex.org/W4385938250","https://openalex.org/W4386065698","https://openalex.org/W4386065847","https://openalex.org/W4386078180","https://openalex.org/W4386179772","https://openalex.org/W4387967987","https://openalex.org/W4392901516","https://openalex.org/W4392931569","https://openalex.org/W4399913139","https://openalex.org/W4400321270","https://openalex.org/W4401527938","https://openalex.org/W4402916775","https://openalex.org/W4404809244","https://openalex.org/W4407025383"],"related_works":[],"abstract_inverted_index":{"Effectively":[0],"fusing":[1],"and":[2,5,30,88,100,148,167],"complementing":[3],"RGB":[4],"depth":[6],"modalities":[7,66],"while":[8],"mitigating":[9],"image":[10],"noise":[11,152],"is":[12,137],"a":[13,27,53,68],"critical":[14],"challenge":[15],"in":[16,123,178],"the":[17,57,74,92,110,116,124,127,146,165],"RGB-D":[18,169],"semantic":[19,142],"segmentation":[20],"task.":[21],"In":[22],"this":[23,37],"paper,":[24],"we":[25,51],"propose":[26],"novel":[28],"Internal":[29],"External":[31,76],"Multi-fusion":[32],"Transformer":[33,59],"(IEMFormer)":[34],"to":[35,44,144,155,158],"address":[36],"issue.":[38],"IEMFormer":[39,173],"incorporates":[40],"stage-specific":[41],"fusion":[42,54],"strategies":[43],"enhance":[45],"modal":[46],"complementarity.":[47],"For":[48,71],"internal":[49],"fusion,":[50,73],"integrate":[52],"unit":[55],"within":[56],"traditional":[58],"block,":[60],"combining":[61],"matching":[62],"tokens":[63],"from":[64,105,130],"both":[65,86,106],"on":[67,164],"pixel-by-pixel":[69],"basis.":[70],"external":[72],"proposed":[75],"Adaptive":[77],"Cross-modal":[78],"Fusion":[79,120],"(EACF)":[80],"module":[81,122],"filters":[82],"dual-modal":[83],"features":[84,129,143],"across":[85],"spatial":[87,103],"channel":[89,98],"dimensions,":[90],"serving":[91],"purpose":[93],"of":[94,112],"adaptively":[95],"weighting":[96],"complementary":[97],"information":[99],"robustly":[101],"aggregating":[102],"patterns":[104],"modalities,":[107],"thereby":[108],"facilitating":[109],"integration":[111],"multimodal":[113],"information.":[114],"Additionally,":[115],"Global":[117],"Self-attention":[118],"Guided":[119],"(GSGF)":[121],"decoder":[125],"refines":[126],"fused":[128],"earlier":[131],"stages,":[132],"effectively":[133],"suppressing":[134],"noise.":[135],"This":[136],"achieved":[138],"by":[139],"leveraging":[140],"high-level":[141],"guide":[145],"refinement":[147],"incorporating":[149],"an":[150],"active":[151],"suppression":[153],"mechanism":[154],"prevent":[156],"overfitting":[157],"dominant,":[159],"noisy":[160],"features.":[161],"Extensive":[162],"experiments":[163],"NYUv2":[166],"SUN":[168],"datasets":[170],"demonstrate":[171],"that":[172],"achieves":[174],"highly":[175],"competitive":[176],"performance":[177],"accurately":[179],"understanding":[180],"indoor":[181],"scenes.":[182]},"counts_by_year":[],"updated_date":"2025-11-25T14:43:58.451035","created_date":"2025-11-11T00:00:00"}
