{"id":"https://openalex.org/W7092622845","doi":"https://doi.org/10.1109/tmm.2025.3623528","title":"CMANet: Context-Aware Mutual Attention Network for Referring Image Segmentation","display_name":"CMANet: Context-Aware Mutual Attention Network for Referring Image Segmentation","publication_year":2025,"publication_date":"2025-10-20","ids":{"openalex":"https://openalex.org/W7092622845","doi":"https://doi.org/10.1109/tmm.2025.3623528"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2025.3623528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3623528","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xiong Pan","orcid":"https://orcid.org/0009-0001-4240-4041"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiong Pan","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi&#x0027;an, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xuemei Xie","orcid":"https://orcid.org/0000-0001-7857-0845"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuemei Xie","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi&#x0027;an, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jianxiu Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I5422934","display_name":"Shanxi Datong University","ror":"https://ror.org/03s8xc553","country_code":"CN","type":"education","lineage":["https://openalex.org/I5422934"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianxiu Yang","raw_affiliation_strings":["Shanxi Datong University, Datong, China"],"affiliations":[{"raw_affiliation_string":"Shanxi Datong University, Datong, China","institution_ids":["https://openalex.org/I5422934"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xiaodan Song","orcid":null},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodan Song","raw_affiliation_strings":["Guangzhou Institute of Technology, Xidian University, Xi&#x0027;an, China"],"affiliations":[{"raw_affiliation_string":"Guangzhou Institute of Technology, Xidian University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":null,"display_name":"Guangming Shi","orcid":"https://orcid.org/0000-0003-2179-3292"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangming Shi","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi&#x0027;an, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi&#x0027;an, China","institution_ids":["https://openalex.org/I149594827"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I149594827"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.55508272,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"285","last_page":"296"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.002899999963119626,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0006000000284984708,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.7024000287055969},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6557999849319458},{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.652899980545044},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.60589998960495},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.54830002784729},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5264000296592712},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4821000099182129},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.47920000553131104},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46549999713897705}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8646000027656555},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.7024000287055969},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.694100022315979},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6557999849319458},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.652899980545044},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.60589998960495},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.54830002784729},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5264000296592712},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4821000099182129},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.47920000553131104},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46549999713897705},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45719999074935913},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4465999901294708},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.38260000944137573},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3675000071525574},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.36649999022483826},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35010001063346863},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.3481999933719635},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.310699999332428},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.28200000524520874},{"id":"https://openalex.org/C2780103172","wikidata":"https://www.wikidata.org/wiki/Q1309721","display_name":"Visual Objects","level":3,"score":0.27889999747276306},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.267300009727478},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C85407183","wikidata":"https://www.wikidata.org/wiki/Q1045785","display_name":"Semantic network","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3623528","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3623528","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2827990972","display_name":null,"funder_award_id":"62293483","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7843016870","display_name":null,"funder_award_id":"62101398","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":65,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2302086703","https://openalex.org/W2302548814","https://openalex.org/W2489434015","https://openalex.org/W2560023338","https://openalex.org/W2605127024","https://openalex.org/W2798556392","https://openalex.org/W2810467724","https://openalex.org/W2896457183","https://openalex.org/W2899341487","https://openalex.org/W2906314281","https://openalex.org/W2962914239","https://openalex.org/W2963109634","https://openalex.org/W2963393688","https://openalex.org/W2963800628","https://openalex.org/W2964284374","https://openalex.org/W2964345792","https://openalex.org/W2973233205","https://openalex.org/W2979739834","https://openalex.org/W2980088508","https://openalex.org/W3034325957","https://openalex.org/W3034692043","https://openalex.org/W3035097537","https://openalex.org/W3108748824","https://openalex.org/W3111947517","https://openalex.org/W3130523820","https://openalex.org/W3134837903","https://openalex.org/W3138516171","https://openalex.org/W3156800342","https://openalex.org/W3159637683","https://openalex.org/W3169998662","https://openalex.org/W3172522282","https://openalex.org/W3172752666","https://openalex.org/W3178075329","https://openalex.org/W3187664142","https://openalex.org/W3199130398","https://openalex.org/W3201770677","https://openalex.org/W3214740560","https://openalex.org/W3216551675","https://openalex.org/W4200174151","https://openalex.org/W4200631575","https://openalex.org/W4307504011","https://openalex.org/W4309181071","https://openalex.org/W4312414965","https://openalex.org/W4312543911","https://openalex.org/W4377711491","https://openalex.org/W4382240193","https://openalex.org/W4383112592","https://openalex.org/W4385764483","https://openalex.org/W4386066126","https://openalex.org/W4386075493","https://openalex.org/W4386076034","https://openalex.org/W4386076142","https://openalex.org/W4386076636","https://openalex.org/W4386108423","https://openalex.org/W4387969771","https://openalex.org/W4390873429","https://openalex.org/W4390873528","https://openalex.org/W4392397297","https://openalex.org/W4392449449","https://openalex.org/W4392693659","https://openalex.org/W4393156126","https://openalex.org/W4400611185","https://openalex.org/W4402703023","https://openalex.org/W4402727260"],"related_works":[],"abstract_inverted_index":{"Referring":[0],"Image":[1],"Segmentation":[2],"(RIS)":[3],"aims":[4],"to":[5,41,74,97,127],"generate":[6],"specified":[7],"target":[8,50,125],"masks":[9],"in":[10,22,78],"the":[11,24,38,44,85,99,110,129,144],"image":[12,146,157],"using":[13],"natural":[14,145],"language.":[15],"While":[16],"existing":[17],"methods":[18,165],"have":[19],"made":[20],"progress":[21],"modeling":[23],"relationship":[25],"between":[26,101],"words":[27],"and":[28,52,70,103,153],"pixels,":[29],"they":[30],"often":[31],"overlook":[32],"sentence-level":[33,71,124],"semantic":[34,72],"information.":[35],"This":[36],"limits":[37],"model's":[39],"ability":[40],"fully":[42],"comprehend":[43],"deeper":[45],"meaning":[46],"of":[47,132],"language,":[48],"affecting":[49],"localization":[51],"segmentation.":[53],"To":[54],"address":[55],"this":[56],"problem,":[57],"we":[58,89,114],"propose":[59],"a":[60,91,116],"Context-aware":[61,117],"Mutual":[62,93,118],"Attention":[63,94,119],"Network":[64],"(CMANet),":[65],"which":[66],"integrates":[67],"both":[68],"word-level":[69],"information":[73],"guide":[75,128],"visual":[76,102],"features":[77],"generating":[79],"precise":[80],"object":[81],"masks.":[82],"Specifically,":[83],"during":[84],"feature":[86],"encoding":[87],"stage,":[88,113],"design":[90],"Shallow":[92],"(SMA)":[95],"module":[96,121],"reduce":[98],"discrepancy":[100],"linguistic":[104],"representations,":[105],"enhancing":[106],"pixel-word":[107],"alignment.":[108],"In":[109],"global":[111],"representation":[112,131],"introduce":[115],"(CMA)":[120],"that":[122,160],"utilizes":[123],"semantics":[126],"contextual":[130],"multi-modal":[133],"features.":[134],"Experiments":[135],"conducted":[136],"on":[137,166],"several":[138],"commonly":[139],"used":[140],"RIS":[141,151],"datasets,":[142,169],"including":[143],"referring":[147,154],"segmentation":[148,172],"dataset,":[149,152,158],"Robust":[150],"remote":[155],"sensing":[156],"show":[159],"CMANet":[161],"outperforms":[162],"current":[163],"state-of-the-art":[164],"all":[167],"these":[168],"demonstrating":[170],"superior":[171],"accuracy.":[173]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-21T00:00:00"}
