{"id":"https://openalex.org/W4416250792","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228844","title":"Multi-modal Feature Enhancement and Prototype Optimization for Few-Shot Segmentation","display_name":"Multi-modal Feature Enhancement and Prototype Optimization for Few-Shot Segmentation","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416250792","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228844"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101420108","display_name":"Ruizhe Zhang","orcid":"https://orcid.org/0000-0003-0676-0900"},"institutions":[{"id":"https://openalex.org/I22046295","display_name":"Inner Mongolia Normal University","ror":"https://ror.org/0497ase59","country_code":"CN","type":"education","lineage":["https://openalex.org/I22046295"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruizhe Zhang","raw_affiliation_strings":["Inner Mongolia Normal University,College of Computer Science and Technology,Hohhot,China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia Normal University,College of Computer Science and Technology,Hohhot,China","institution_ids":["https://openalex.org/I22046295"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112301731","display_name":"Yanjun Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I22046295","display_name":"Inner Mongolia Normal University","ror":"https://ror.org/0497ase59","country_code":"CN","type":"education","lineage":["https://openalex.org/I22046295"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjun Yin","raw_affiliation_strings":["Inner Mongolia Normal University,College of Computer Science and Technology,Hohhot,China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia Normal University,College of Computer Science and Technology,Hohhot,China","institution_ids":["https://openalex.org/I22046295"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101420108"],"corresponding_institution_ids":["https://openalex.org/I22046295"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37361693,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5418999791145325,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5418999791145325,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.24570000171661377,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08789999783039093,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.7149999737739563},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.7027999758720398},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6417999863624573},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6410999894142151},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5335000157356262},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4447999894618988},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.4399000108242035}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7968000173568726},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.7149999737739563},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.7027999758720398},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6417999863624573},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6410999894142151},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6236000061035156},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5335000157356262},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4447999894618988},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.4399000108242035},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.4156000018119812},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40459999442100525},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.37279999256134033},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36090001463890076},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2810392541","https://openalex.org/W2963078159","https://openalex.org/W2981787211","https://openalex.org/W2983850069","https://openalex.org/W2990230185","https://openalex.org/W3011743383","https://openalex.org/W3034942609","https://openalex.org/W3047258141","https://openalex.org/W3106906018","https://openalex.org/W3108189450","https://openalex.org/W3167453437","https://openalex.org/W3169024950","https://openalex.org/W3176065502","https://openalex.org/W3203637842","https://openalex.org/W3204077273","https://openalex.org/W4214573368","https://openalex.org/W4214660208","https://openalex.org/W4285604202","https://openalex.org/W4312420092","https://openalex.org/W4312960937","https://openalex.org/W4313196086","https://openalex.org/W4362683490","https://openalex.org/W4386833253","https://openalex.org/W4390872570"],"related_works":[],"abstract_inverted_index":{"In":[0],"few-shot":[1],"semantic":[2,77,130],"segmentation,":[3],"the":[4,53,60,68,139,144,148],"CLIP":[5],"model,":[6],"with":[7,39],"its":[8],"cross-modal":[9,88],"learning":[10],"capabilities,":[11],"shows":[12],"potential":[13],"in":[14,114],"combining":[15],"visual":[16],"and":[17,58,117,123,172],"textual":[18,97],"information":[19,57,78],"to":[20,37,102,142],"enhance":[21],"performance.":[22],"Most":[23],"models":[24],"follow":[25],"a":[26,87,128],"unimodal":[27],"prototype":[28,80,91,121,154],"generation":[29,92],"method,":[30,93],"extracting":[31],"representative":[32],"features":[33,98],"from":[34],"support":[35,49],"images":[36],"compare":[38],"query":[40,104],"image":[41,50],"features.":[42,64,105],"However,":[43],"this":[44],"approach":[45],"relies":[46],"only":[47],"on":[48,147,165],"features,":[51],"limiting":[52],"use":[54],"of":[55,62,70],"multi-modal":[56,129],"reducing":[59],"expressiveness":[61,122],"category":[63,153],"It":[65],"also":[66],"overlooks":[67],"impact":[69],"MAP":[71],"operations,":[72],"which":[73,134],"may":[74],"introduce":[75],"irrelevant":[76],"during":[79],"generation.":[81],"To":[82],"address":[83],"this,":[84],"we":[85,126],"propose":[86,127],"feature":[89],"enhancement":[90,131],"incorporating":[94],"CLIP-generated":[95],"class-specific":[96],"into":[99,138],"model":[100],"training":[101],"strengthen":[103],"This":[106],"method":[107,161],"activates":[108],"shared":[109],"semantics":[110],"between":[111],"support-query":[112],"pairs":[113],"both":[115],"spatial":[116],"channel":[118],"dimensions,":[119],"improving":[120],"discrimination.":[124],"Additionally,":[125],"prior":[132,136],"module,":[133],"integrates":[135],"masks":[137],"segmentation":[140],"process":[141],"guide":[143],"model\u2019s":[145],"focus":[146],"target":[149],"region,":[150],"further":[151],"enhancing":[152],"representation.":[155],"Experiments":[156],"demonstrate":[157],"that":[158],"our":[159],"proposed":[160],"achieves":[162],"excellent":[163],"results":[164],"two":[166],"common":[167],"datasets,":[168],"PASCAL-5<sup":[169],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[170,174],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">i</sup>":[171],"COCO-20<sup":[173],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">i</sup>.":[175]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
