{"id":"https://openalex.org/W4417284346","doi":"https://doi.org/10.1109/tcsvt.2025.3643649","title":"SSP-SAM: SAM With Semantic-Spatial Prompt for Referring Expression Segmentation","display_name":"SSP-SAM: SAM With Semantic-Spatial Prompt for Referring Expression Segmentation","publication_year":2025,"publication_date":"2025-12-12","ids":{"openalex":"https://openalex.org/W4417284346","doi":"https://doi.org/10.1109/tcsvt.2025.3643649"},"language":null,"primary_location":{"id":"doi:10.1109/tcsvt.2025.3643649","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3643649","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101459385","display_name":"Wei Tang","orcid":"https://orcid.org/0000-0003-3414-2421"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Tang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, Nanjing University of Science and Technology, No. 200 Xiaolingwei Road, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-3414-2421","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, No. 200 Xiaolingwei Road, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101944061","display_name":"Xuejing Liu","orcid":"https://orcid.org/0000-0001-9612-3707"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuejing Liu","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9612-3707","affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101812029","display_name":"Yanpeng Sun","orcid":"https://orcid.org/0000-0001-6249-5596"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yanpeng Sun","raw_affiliation_strings":["NExT++ Laboratory, School of Computing, National University of Singapore, Queenstown, Singapore","School of Computing, NExT++ Lab, National University of Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0001-6249-5596","affiliations":[{"raw_affiliation_string":"NExT++ Laboratory, School of Computing, National University of Singapore, Queenstown, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"School of Computing, NExT++ Lab, National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017096005","display_name":"Zechao Li","orcid":"https://orcid.org/0000-0002-5341-5985"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zechao Li","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, Nanjing University of Science and Technology, No. 200 Xiaolingwei Road, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-5341-5985","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, No. 200 Xiaolingwei Road, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.357719,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"5","first_page":"6374","last_page":"6389"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.020099999383091927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.005499999970197678,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8555999994277954},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.8174999952316284},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.551800012588501},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.546999990940094},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5218999981880188},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5213000178337097},{"id":"https://openalex.org/keywords/referent","display_name":"Referent","score":0.447299987077713}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8555999994277954},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.8174999952316284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7621999979019165},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6747000217437744},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.551800012588501},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.546999990940094},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5218999981880188},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5213000178337097},{"id":"https://openalex.org/C2777096784","wikidata":"https://www.wikidata.org/wiki/Q3826351","display_name":"Referent","level":2,"score":0.447299987077713},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.384799987077713},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.36149999499320984},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34389999508857727},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.31949999928474426},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.2549999952316284},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3643649","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3643649","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2035907670","display_name":null,"funder_award_id":"BK20243018","funder_id":"https://openalex.org/F4320334982","funder_display_name":"Basic Research Program of Jiangsu Province"},{"id":"https://openalex.org/G2531279109","display_name":null,"funder_award_id":"62425603","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334982","display_name":"Basic Research Program of Jiangsu Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,182],"Segment":[1],"Anything":[2],"Model":[3],"(SAM)":[4],"excels":[5],"at":[6,158],"general":[7],"image":[8],"segmentation":[9,40,149],"but":[10],"has":[11],"limited":[12],"ability":[13],"to":[14,94,114,177],"understand":[15],"natural":[16],"language,":[17],"which":[18,62],"restricts":[19],"its":[20],"direct":[21],"application":[22],"in":[23,88,173],"Referring":[24],"Expression":[25],"Segmentation":[26],"(RES).":[27],"Toward":[28],"this":[29,123],"end,":[30],"we":[31,50],"propose":[32],"SSP-SAM,":[33],"a":[34,44],"framework":[35],"that":[36,91],"fully":[37],"utilizes":[38],"SAM\u2019s":[39],"capabilities":[41],"by":[42,99],"integrating":[43],"Semantic-Spatial":[45],"Prompt":[46],"(SSP)":[47],"encoder.":[48],"Specifically,":[49],"incorporate":[51],"both":[52],"visual":[53,68],"and":[54,70,136,184],"linguistic":[55,75],"attention":[56],"adapters":[57],"into":[58],"the":[59,67,74,80,84,110,140,167],"SSP":[60],"encoder,":[61],"highlight":[63],"salient":[64],"objects":[65],"within":[66,73],"features":[69],"discriminative":[71],"phrases":[72],"features.":[76],"This":[77],"design":[78],"enhances":[79],"referent":[81,111],"representation":[82],"for":[83,105],"prompt":[85],"generator,":[86],"resulting":[87],"high-quality":[89],"SSPs":[90],"enable":[92],"SAM":[93],"generate":[95],"precise":[96],"masks":[97,150],"guided":[98],"language.":[100],"Although":[101],"not":[102],"specifically":[103],"designed":[104],"Generalized":[106],"RES":[107,135,180],"(GRES),":[108],"where":[109],"may":[112],"correspond":[113],"zero,":[115],"one,":[116],"or":[117],"multiple":[118],"objects,":[119],"SSP-SAM":[120],"naturally":[121],"supports":[122],"more":[124],"flexible":[125],"setting":[126],"without":[127],"additional":[128],"modifications.":[129],"Extensive":[130],"experiments":[131],"on":[132,166],"widely":[133],"used":[134],"GRES":[137],"benchmarks":[138],"confirm":[139],"superiority":[141],"of":[142,151],"our":[143,146],"method.":[144],"Notably,":[145],"approach":[147],"generates":[148],"high":[152],"quality,":[153],"achieving":[154],"strong":[155],"precision":[156],"even":[157],"strict":[159],"thresholds":[160],"such":[161],"as":[162],"Pr@0.9.":[163],"Further":[164],"evaluation":[165],"PhraseCut":[168],"dataset":[169],"demonstrates":[170],"improved":[171],"performance":[172],"open-vocabulary":[174],"scenarios":[175],"compared":[176],"existing":[178],"state-of-the-art":[179],"methods.":[181],"code":[183],"checkpoints":[185],"are":[186],"available":[187],"at:":[188],"https://github.com/WayneTomas/SSP-SAM.":[189]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-12T00:00:00"}
