{"id":"https://openalex.org/W4379806341","doi":"https://doi.org/10.1145/3591106.3592235","title":"Intra-inter Modal Attention Blocks for RGB-D Semantic Segmentation","display_name":"Intra-inter Modal Attention Blocks for RGB-D Semantic Segmentation","publication_year":2023,"publication_date":"2023-06-08","ids":{"openalex":"https://openalex.org/W4379806341","doi":"https://doi.org/10.1145/3591106.3592235"},"language":"en","primary_location":{"id":"doi:10.1145/3591106.3592235","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3591106.3592235","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014224476","display_name":"Soyun Choi","orcid":"https://orcid.org/0000-0002-2701-4782"},"institutions":[{"id":"https://openalex.org/I191879574","display_name":"Inha University","ror":"https://ror.org/01easw929","country_code":"KR","type":"education","lineage":["https://openalex.org/I191879574"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Soyun Choi","raw_affiliation_strings":["Inha University, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-2701-4782","affiliations":[{"raw_affiliation_string":"Inha University, Republic of Korea","institution_ids":["https://openalex.org/I191879574"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007958924","display_name":"Youjia Zhang","orcid":"https://orcid.org/0000-0002-5671-3232"},"institutions":[{"id":"https://openalex.org/I191879574","display_name":"Inha University","ror":"https://ror.org/01easw929","country_code":"KR","type":"education","lineage":["https://openalex.org/I191879574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Youjia Zhang","raw_affiliation_strings":["Inha University, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-5671-3232","affiliations":[{"raw_affiliation_string":"Inha University, Republic of Korea","institution_ids":["https://openalex.org/I191879574"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044809763","display_name":"Sungeun Hong","orcid":"https://orcid.org/0000-0003-1774-9168"},"institutions":[{"id":"https://openalex.org/I191879574","display_name":"Inha University","ror":"https://ror.org/01easw929","country_code":"KR","type":"education","lineage":["https://openalex.org/I191879574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sungeun Hong","raw_affiliation_strings":["Inha University, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0003-1774-9168","affiliations":[{"raw_affiliation_string":"Inha University, Republic of Korea","institution_ids":["https://openalex.org/I191879574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5014224476"],"corresponding_institution_ids":["https://openalex.org/I191879574"],"apc_list":null,"apc_paid":null,"fwci":0.3532,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.57924617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"217","last_page":"225"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.8240257501602173},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7344169020652771},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6965658068656921},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.6288573741912842},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6046989560127258},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5712476968765259},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5398525595664978},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3719935417175293},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.32701146602630615},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.10819399356842041},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.057857513427734375},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.05231693387031555}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.8240257501602173},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7344169020652771},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6965658068656921},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.6288573741912842},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6046989560127258},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5712476968765259},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5398525595664978},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3719935417175293},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32701146602630615},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.10819399356842041},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.057857513427734375},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.05231693387031555},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3591106.3592235","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3591106.3592235","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 ACM International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W125693051","https://openalex.org/W1745334888","https://openalex.org/W1903029394","https://openalex.org/W1905829557","https://openalex.org/W1923184257","https://openalex.org/W2033979122","https://openalex.org/W2067912884","https://openalex.org/W2112546694","https://openalex.org/W2116040950","https://openalex.org/W2119300483","https://openalex.org/W2154996879","https://openalex.org/W2194775991","https://openalex.org/W2412782625","https://openalex.org/W2485522583","https://openalex.org/W2549139847","https://openalex.org/W2563705555","https://openalex.org/W2604455318","https://openalex.org/W2752782242","https://openalex.org/W2776622059","https://openalex.org/W2777686015","https://openalex.org/W2798791840","https://openalex.org/W2959581809","https://openalex.org/W2960933010","https://openalex.org/W2962802951","https://openalex.org/W2963091558","https://openalex.org/W2963321359","https://openalex.org/W2963881378","https://openalex.org/W2963896186","https://openalex.org/W2964309882","https://openalex.org/W2965391153","https://openalex.org/W2971014764","https://openalex.org/W2981899103","https://openalex.org/W2982220924","https://openalex.org/W2985459778","https://openalex.org/W3031350880","https://openalex.org/W3034355852","https://openalex.org/W3035276179","https://openalex.org/W3108601100","https://openalex.org/W3137799923","https://openalex.org/W3163662587","https://openalex.org/W3169283839","https://openalex.org/W3170544306","https://openalex.org/W3171949150","https://openalex.org/W3194841206","https://openalex.org/W3196107314","https://openalex.org/W3207537403","https://openalex.org/W3211490618","https://openalex.org/W4285102506","https://openalex.org/W4285158377","https://openalex.org/W4313181443"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574","https://openalex.org/W4246352526","https://openalex.org/W2121910908"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"introduce":[4],"a":[5,57],"novel":[6],"approach":[7],"to":[8,39],"address":[9],"the":[10,37,78,86,92,112,122],"challenge":[11],"of":[12,36,56,124],"effectively":[13],"utilizing":[14],"both":[15,31,72],"RGB":[16],"and":[17,33,61,74,127],"depth":[18],"information":[19,38],"for":[20],"semantic":[21],"segmentation.":[22,133],"Our":[23,115],"approach,":[24],"Intra-inter":[25],"Modal":[26],"Attention":[27],"(IMA)":[28],"blocks,":[29],"considers":[30],"intra-modal":[32,73],"inter-modal":[34,50,75],"aspects":[35],"produce":[40],"better":[41],"results":[42,96,120],"than":[43],"prior":[44],"methods":[45],"which":[46],"primarily":[47],"focused":[48],"on":[49,97,121,131],"relationships.":[51],"The":[52,67],"IMA":[53,113],"blocks":[54],"consist":[55],"cross-modal":[58,68],"non-local":[59,69],"module":[60,70,90],"an":[62],"adaptive":[63,87],"channel-wise":[64,88],"fusion":[65,89],"module.":[66],"captures":[71],"variations":[76],"at":[77],"spatial":[79],"level":[80],"through":[81],"inter-modality":[82],"parameter":[83],"sharing,":[84],"while":[85],"refines":[91],"spatially-correlated":[93],"features.":[94],"Experimental":[95],"RGB-D":[98,132],"benchmark":[99],"datasets":[100],"demonstrate":[101],"consistent":[102],"performance":[103],"improvements":[104],"over":[105],"various":[106],"baseline":[107],"segmentation":[108],"networks":[109],"when":[110],"using":[111],"blocks.":[114],"in-depth":[116],"analysis":[117],"provides":[118],"comprehensive":[119],"impact":[123],"intra-,":[125],"inter-,":[126],"intra-inter":[128],"modal":[129],"attention":[130]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
