{"id":"https://openalex.org/W4415591165","doi":"https://doi.org/10.1007/s44230-025-00115-4","title":"Prompt-SAM: A Vision-Language and SAM based Hybrid Framework for Prompt-Augmented Zero-Shot Segmentation","display_name":"Prompt-SAM: A Vision-Language and SAM based Hybrid Framework for Prompt-Augmented Zero-Shot Segmentation","publication_year":2025,"publication_date":"2025-10-27","ids":{"openalex":"https://openalex.org/W4415591165","doi":"https://doi.org/10.1007/s44230-025-00115-4"},"language":"en","primary_location":{"id":"doi:10.1007/s44230-025-00115-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44230-025-00115-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44230-025-00115-4.pdf","source":{"id":"https://openalex.org/S4210207486","display_name":"Human-Centric Intelligent Systems","issn_l":"2667-1336","issn":["2667-1336"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Human-Centric Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://link.springer.com/content/pdf/10.1007/s44230-025-00115-4.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048180575","display_name":"Uma Gurav","orcid":"https://orcid.org/0000-0002-8816-6333"},"institutions":[{"id":"https://openalex.org/I4210097370","display_name":"Sanjay Ghodawat University","ror":"https://ror.org/00tk46k54","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210097370"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Uma Gurav","raw_affiliation_strings":["Department of Computer Science Engineering (AIML), KIT\u2019s College of Engineering, Kolhapur, 416234, Maharashtra, India","Department of Computer Science Engineering (AIML), KIT's College of Engineering, Kolhapur, 416234, Maharashtra, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science Engineering (AIML), KIT\u2019s College of Engineering, Kolhapur, 416234, Maharashtra, India","institution_ids":["https://openalex.org/I4210097370"]},{"raw_affiliation_string":"Department of Computer Science Engineering (AIML), KIT's College of Engineering, Kolhapur, 416234, Maharashtra, India","institution_ids":["https://openalex.org/I4210097370"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sanket Jadhav","orcid":null},"institutions":[{"id":"https://openalex.org/I4210097370","display_name":"Sanjay Ghodawat University","ror":"https://ror.org/00tk46k54","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210097370"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sanket Jadhav","raw_affiliation_strings":["Department of Computer Science Engineering (AIML), KIT\u2019s College of Engineering, Kolhapur, 416234, Maharashtra, India","Department of Computer Science Engineering (AIML), KIT's College of Engineering, Kolhapur, 416234, Maharashtra, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science Engineering (AIML), KIT\u2019s College of Engineering, Kolhapur, 416234, Maharashtra, India","institution_ids":["https://openalex.org/I4210097370"]},{"raw_affiliation_string":"Department of Computer Science Engineering (AIML), KIT's College of Engineering, Kolhapur, 416234, Maharashtra, India","institution_ids":["https://openalex.org/I4210097370"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048180575"],"corresponding_institution_ids":["https://openalex.org/I4210097370"],"apc_list":null,"apc_paid":null,"fwci":1.1783,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.84086008,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"5","issue":"4","first_page":"431","last_page":"449"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10522","display_name":"Medical Imaging Techniques and Applications","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.8248000144958496},{"id":"https://openalex.org/keywords/dice","display_name":"Dice","score":0.6026999950408936},{"id":"https://openalex.org/keywords/pascal","display_name":"Pascal (unit)","score":0.5877000093460083},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5160999894142151},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.49149999022483826},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4869999885559082},{"id":"https://openalex.org/keywords/scale-space-segmentation","display_name":"Scale-space segmentation","score":0.48579999804496765},{"id":"https://openalex.org/keywords/segmentation-based-object-categorization","display_name":"Segmentation-based object categorization","score":0.4683000147342682}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.8248000144958496},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7950999736785889},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7811999917030334},{"id":"https://openalex.org/C22029948","wikidata":"https://www.wikidata.org/wiki/Q45089","display_name":"Dice","level":2,"score":0.6026999950408936},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.5877000093460083},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5160999894142151},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.49149999022483826},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4869999885559082},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.48579999804496765},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.484499990940094},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.4683000147342682},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43059998750686646},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.39340001344680786},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3292999863624573},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3188999891281128},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.31029999256134033},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.30979999899864197},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.26600000262260437},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s44230-025-00115-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44230-025-00115-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44230-025-00115-4.pdf","source":{"id":"https://openalex.org/S4210207486","display_name":"Human-Centric Intelligent Systems","issn_l":"2667-1336","issn":["2667-1336"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Human-Centric Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:12a9166f0b874fecbf761c712aafb39a","is_oa":true,"landing_page_url":"https://doaj.org/article/12a9166f0b874fecbf761c712aafb39a","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Human-Centric Intelligent Systems, Vol 5, Iss 4, Pp 431-449 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s44230-025-00115-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44230-025-00115-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44230-025-00115-4.pdf","source":{"id":"https://openalex.org/S4210207486","display_name":"Human-Centric Intelligent Systems","issn_l":"2667-1336","issn":["2667-1336"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319965","host_organization_name":"Springer Nature","host_organization_lineage":["https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Human-Centric Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320736","display_name":"All India Council for Technical Education","ror":"https://ror.org/00xcd7y72"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415591165.pdf","grobid_xml":"https://content.openalex.org/works/W4415591165.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1901129140","https://openalex.org/W2031489346","https://openalex.org/W2340897893","https://openalex.org/W2412782625","https://openalex.org/W2507296351","https://openalex.org/W2884436604","https://openalex.org/W2963881378","https://openalex.org/W3159481202","https://openalex.org/W4226058394","https://openalex.org/W4312420092","https://openalex.org/W4312912313","https://openalex.org/W4312980231","https://openalex.org/W4313156423","https://openalex.org/W4386071643","https://openalex.org/W4386075882","https://openalex.org/W4386075997","https://openalex.org/W4390874575","https://openalex.org/W4402916510","https://openalex.org/W4404690627","https://openalex.org/W4413146696"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Recent":[1],"advancements":[2],"in":[3,170,198],"deep":[4],"learning":[5,24],"have":[6],"greatly":[7],"improved":[8],"computer":[9],"vision":[10],"tasks":[11],"like":[12],"object":[13],"detection,":[14],"image":[15],"classification,":[16],"and":[17,36,64,92,131,143,164,178,192,212],"segmentation.":[18],"Despite":[19],"these":[20],"successes,":[21],"traditional":[22],"supervised":[23],"methods":[25],"still":[26],"depend":[27],"on":[28,136,222,229],"large":[29],"annotated":[30,223],"datasets,":[31],"which":[32],"are":[33,95],"often":[34],"expensive":[35],"time":[37],"consuming":[38],"to":[39,82,100],"create.":[40],"To":[41],"overcome":[42],"this":[43,106,188],"limitation,":[44],"we":[45],"present":[46],"a":[47,122,159,165,210],"zero-shot":[48,75,216],"segmentation":[49,85,103,113],"framework":[50,208],"that":[51,187],"combines":[52],"the":[53,65,74,109,148,151,155,171,205,220],"strengths":[54],"of":[55,78,125,150,162,168],"CLIP":[56,79],"(Contrastive":[57],"Language-Image":[58],"Pretraining),":[59],"its":[60],"segmentation-focused":[61],"variant":[62],"CLIPSeg,":[63],"Segment":[66],"Anything":[67],"Model":[68],"(SAM).":[69],"This":[70],"approach":[71],"first":[72],"uses":[73],"classification":[76],"ability":[77],"or":[80,200],"CLIPSeg":[81,177],"produce":[83],"initial":[84],"cues.":[86],"These":[87],"cues,":[88],"such":[89,138],"as":[90,139],"point":[91],"box":[93],"prompts,":[94],"then":[96],"refined":[97],"by":[98],"SAM":[99,179],"generate":[101],"accurate":[102],"masks.":[104],"Using":[105],"prompt-based":[107],"strategy,":[108],"system":[110],"can":[111],"perform":[112],"without":[114],"requiring":[115],"labeled":[116],"data,":[117],"making":[118],"it":[119],"suitable":[120],"for":[121,215],"wide":[123],"range":[124],"domains,":[126],"including":[127],"both":[128,176],"natural":[129],"scenes":[130],"medical":[132],"imaging.":[133],"Our":[134],"experiments":[135],"benchmarks":[137],"MS-COCO,":[140],"Pascal":[141],"VOC,":[142],"chest":[144,172],"X-ray":[145,173],"datasets":[146,224],"highlight":[147],"effectiveness":[149],"method.":[152],"In":[153,203],"particular,":[154],"CLIPSeg+SAM":[156],"combination":[157],"achieves":[158],"mean":[160],"IoU":[161],"0.793":[163],"Dice":[166],"score":[167],"0.873":[169],"dataset,":[174],"outperforming":[175],"when":[180],"used":[181],"alone.":[182],"Visual":[183],"results":[184],"also":[185],"show":[186],"method":[189],"produces":[190],"clearer":[191],"more":[193],"precise":[194],"mask":[195],"boundaries,":[196],"even":[197],"challenging":[199],"cluttered":[201],"environments.":[202],"summary,":[204],"proposed":[206],"training-free":[207],"offers":[209],"scalable":[211],"generalizable":[213],"solution":[214],"segmentation,":[217],"significantly":[218],"reducing":[219],"reliance":[221],"while":[225],"delivering":[226],"strong":[227],"performance":[228],"unseen":[230],"classes.":[231]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-28T00:00:00"}
