{"id":"https://openalex.org/W4320060449","doi":"https://doi.org/10.1145/3536221.3556570","title":"Structured Multimodal Fusion Network for Referring Image Segmentation","display_name":"Structured Multimodal Fusion Network for Referring Image Segmentation","publication_year":2022,"publication_date":"2022-11-04","ids":{"openalex":"https://openalex.org/W4320060449","doi":"https://doi.org/10.1145/3536221.3556570"},"language":"en","primary_location":{"id":"doi:10.1145/3536221.3556570","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3536221.3556570","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Multimodal Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004832199","display_name":"Mingcheng Xue","orcid":"https://orcid.org/0000-0002-6614-8871"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingcheng Xue","raw_affiliation_strings":["Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China"],"raw_orcid":"https://orcid.org/0000-0002-6614-8871","affiliations":[{"raw_affiliation_string":"Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004895426","display_name":"Yu Liu","orcid":"https://orcid.org/0000-0002-2067-9175"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Liu","raw_affiliation_strings":["Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060622132","display_name":"Kaiping Xu","orcid":"https://orcid.org/0000-0001-7388-7878"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiping Xu","raw_affiliation_strings":["Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100392409","display_name":"Haiyang Zhang","orcid":"https://orcid.org/0000-0002-3025-9609"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiyang Zhang","raw_affiliation_strings":["Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100760566","display_name":"Chengyang Yu","orcid":"https://orcid.org/0009-0003-5217-1100"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengyang Yu","raw_affiliation_strings":["Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, SCHOOL OF SOFTWARE TECHNOLOGY, DALIAN UNIVERSITY OF TECHNOLOGY, China","institution_ids":["https://openalex.org/I27357992"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5004832199"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14991414,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"36","last_page":"47"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8085664510726929},{"id":"https://openalex.org/keywords/referent","display_name":"Referent","score":0.7144706845283508},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6940823793411255},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.645179271697998},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5819563269615173},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5156139731407166},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.4499359130859375},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.44099369645118713},{"id":"https://openalex.org/keywords/image-fusion","display_name":"Image fusion","score":0.42990052700042725},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40372779965400696},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3481346368789673},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.33136609196662903}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8085664510726929},{"id":"https://openalex.org/C2777096784","wikidata":"https://www.wikidata.org/wiki/Q3826351","display_name":"Referent","level":2,"score":0.7144706845283508},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6940823793411255},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.645179271697998},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5819563269615173},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5156139731407166},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4499359130859375},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44099369645118713},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.42990052700042725},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40372779965400696},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3481346368789673},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.33136609196662903},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3536221.3556570","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3536221.3556570","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Multimodal Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5899999737739563}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1903029394","https://openalex.org/W2031489346","https://openalex.org/W2117539524","https://openalex.org/W2131774270","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2250861254","https://openalex.org/W2251512949","https://openalex.org/W2302548814","https://openalex.org/W2489434015","https://openalex.org/W2560023338","https://openalex.org/W2583360688","https://openalex.org/W2605127024","https://openalex.org/W2798556392","https://openalex.org/W2876852810","https://openalex.org/W2894964039","https://openalex.org/W2963109634","https://openalex.org/W2963244312","https://openalex.org/W2963717374","https://openalex.org/W2963735856","https://openalex.org/W2963881378","https://openalex.org/W2964345792","https://openalex.org/W2980088508","https://openalex.org/W2984121207","https://openalex.org/W2986803748","https://openalex.org/W3004019157","https://openalex.org/W3034325957","https://openalex.org/W3034692043","https://openalex.org/W3034772468","https://openalex.org/W3093025045","https://openalex.org/W3093122931","https://openalex.org/W3096609285","https://openalex.org/W3108748824","https://openalex.org/W3138516171","https://openalex.org/W3156800342","https://openalex.org/W3167814535","https://openalex.org/W3171927989","https://openalex.org/W3201770677","https://openalex.org/W3206209177","https://openalex.org/W4214490042"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W2143938773","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W4312814274","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W1522196789"],"abstract_inverted_index":{"Referring":[0],"image":[1],"segmentation":[2,45],"aims":[3],"to":[4,26,33,42,63,96,119,124],"segment":[5],"one":[6],"particular":[7],"object":[8],"referred":[9],"by":[10],"a":[11,70,79,82,86,115],"natural":[12],"language":[13,32],"expression":[14],"in":[15],"the":[16,35,44,48,59,105,132],"image.":[17],"One":[18],"major":[19,38],"challenge":[20,39],"of":[21,47,78],"this":[22,51],"task":[23],"is":[24,40],"how":[25,41],"understand":[27],"and":[28,31,57,85,100,111,136],"align":[29],"vision":[30],"distinguish":[34],"referent.":[36,49],"Another":[37],"refine":[43],"mask":[46,87,116],"In":[50],"paper,":[52],"we":[53,68],"focus":[54],"on":[55,131],"dissecting":[56],"enhancing":[58],"interaction":[60],"between":[61],"modalities":[62],"address":[64],"these":[65],"challenges.":[66],"Specifically,":[67],"propose":[69],"Structured":[71],"Multimodal":[72],"Fusion":[73],"Network":[74],"(SMFN),":[75],"which":[76],"consists":[77],"multimodal":[80,93],"tree,":[81],"cross-modal":[83],"transformer,":[84],"refinement":[88,117],"module.":[89],"SMFN":[90],"first":[91],"exploits":[92],"fusion":[94],"structures":[95],"deeply":[97],"integrate":[98],"visual":[99,122],"linguistic":[101],"features":[102,123],"so":[103],"that":[104],"referent":[106],"can":[107],"be":[108],"accurately":[109],"distinguished":[110],"then":[112],"further":[113],"utilizes":[114],"module":[118],"aggregate":[120],"multi-scale":[121],"clarify":[125],"boundaries.":[126],"We":[127],"conduct":[128],"extensive":[129],"experiments":[130],"four":[133],"benchmark":[134],"datasets":[135],"achieve":[137],"new":[138],"state-of-the-art":[139],"performances":[140],"under":[141],"different":[142],"evaluation":[143],"metrics.":[144]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
