{"id":"https://openalex.org/W7164862650","doi":"https://doi.org/10.1145/3805622.3810586","title":"SAM3-LiteText: An Anatomical Study of the SAM3 Text Encoder for Efficient Vision-Language Segmentation","display_name":"SAM3-LiteText: An Anatomical Study of the SAM3 Text Encoder for Efficient Vision-Language Segmentation","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164862650","doi":"https://doi.org/10.1145/3805622.3810586"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810586","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810586","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029631335","display_name":"Chengxi Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chengxi Zeng","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-0872-2054","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006561266","display_name":"Yuxuan Jiang","orcid":"https://orcid.org/0000-0002-2279-6199"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yuxuan Jiang","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0009-6102-5133","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048549975","display_name":"Ge Gao","orcid":"https://orcid.org/0000-0001-6470-8815"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ge Gao","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0007-4202-9791","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328272","display_name":"Shuai Wang","orcid":"https://orcid.org/0000-0002-1595-3619"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Shuai Wang","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-1595-3619","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040032244","display_name":"Duolikun Danier","orcid":"https://orcid.org/0000-0002-9320-7099"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Duolikun Danier","raw_affiliation_strings":["University of Edinburgh, Edinburgh, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-9320-7099","affiliations":[{"raw_affiliation_string":"University of Edinburgh, Edinburgh, United Kingdom","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124128501","display_name":"Bin Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bin Zhu","raw_affiliation_strings":["Singapore Management University, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-9213-2611","affiliations":[{"raw_affiliation_string":"Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075331928","display_name":"Stevan Rudinac","orcid":"https://orcid.org/0000-0003-1904-8736"},"institutions":[{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Stevan Rudinac","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"raw_orcid":"https://orcid.org/0000-0003-1904-8736","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048009053","display_name":"David Bull","orcid":"https://orcid.org/0000-0001-7634-190X"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"David Bull","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-7634-190X","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100403389","display_name":"Fan Zhang","orcid":"https://orcid.org/0000-0001-6623-9936"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fan Zhang","raw_affiliation_strings":["University of Bristol, Bristol, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-6623-9936","affiliations":[{"raw_affiliation_string":"University of Bristol, Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.94096506,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1147","last_page":"1156"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7882000207901001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7882000207901001,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.07900000363588333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.040800001472234726,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.8691999912261963},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7594000101089478},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6177999973297119},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5425999760627747},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5102999806404114},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5091000199317932},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.44350001215934753}],"concepts":[{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.8691999912261963},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7928000092506409},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7594000101089478},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6177999973297119},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6159999966621399},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5425999760627747},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5102999806404114},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5091000199317932},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.44589999318122864},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.44350001215934753},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4424999952316284},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40459999442100525},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.4011000096797943},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.32330000400543213},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3156999945640564},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2906000018119812},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.28220000863075256}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810586","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810586","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810586","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6400551795959473,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2252355370","https://openalex.org/W2302548814","https://openalex.org/W2489434015","https://openalex.org/W2603203130","https://openalex.org/W2754478492","https://openalex.org/W2760390332","https://openalex.org/W2948672349","https://openalex.org/W2963109634","https://openalex.org/W3034457371","https://openalex.org/W3096609285","https://openalex.org/W3105966348","https://openalex.org/W3119686997","https://openalex.org/W3159619744","https://openalex.org/W4286904999","https://openalex.org/W4312473433","https://openalex.org/W4312912313","https://openalex.org/W4386075819","https://openalex.org/W4390874575","https://openalex.org/W4391109864","https://openalex.org/W4402727234","https://openalex.org/W4402727760","https://openalex.org/W4402753581","https://openalex.org/W4402776403","https://openalex.org/W4402915908","https://openalex.org/W4413144929","https://openalex.org/W4413277889"],"related_works":[],"abstract_inverted_index":{"Vision-language":[0],"segmentation":[1,25,131,151],"models":[2],"such":[3],"as":[4],"SAM3":[5,111],"enable":[6],"flexible,":[7],"prompt-driven":[8],"visual":[9],"grounding,":[10],"but":[11],"inherit":[12],"large,":[13],"general-purpose":[14],"text":[15,38,57,85,104,112,137],"encoders":[16],"originally":[17],"designed":[18],"for":[19],"open-ended":[20],"language":[21],"understanding.":[22],"In":[23,47],"practice,":[24],"prompts":[26,65],"are":[27,77],"short,":[28],"structured,":[29],"and":[30,41,44,84,129],"semantically":[31],"constrained,":[32],"leading":[33],"to":[34,142,154],"substantial":[35],"over-provisioning":[36],"in":[37,59],"encoder":[39,113,138],"capacity":[40],"persistent":[42],"computational":[43],"memory":[45,147],"overhead.":[46],"this":[48],"paper,":[49],"we":[50,99],"perform":[51],"a":[52,89,102,115],"large-scale":[53],"anatomical":[54],"analysis":[55,70],"of":[56],"prompting":[58],"vision\u2013language":[60],"segmentation,":[61],"covering":[62],"404,796":[63],"real":[64],"across":[66],"multiple":[67],"benchmarks.":[68],"Our":[69],"reveals":[71],"severe":[72],"redundancy:":[73],"most":[74],"context":[75],"windows":[76],"underutilized,":[78],"vocabulary":[79],"usage":[80],"is":[81,120],"highly":[82],"sparse,":[83],"embeddings":[86],"lie":[87],"on":[88,127],"low-dimensional":[90],"manifold":[91],"despite":[92],"high-dimensional":[93],"representations.":[94],"Motivated":[95],"by":[96,122,140],"these":[97],"findings,":[98],"propose":[100],"SAM3-LiteText,":[101],"lightweight":[103],"encoding":[105],"framework":[106],"that":[107,119,134],"replaces":[108],"the":[109,155],"original":[110,156],"with":[114],"compact":[116],"MobileCLIP":[117],"student":[118],"optimized":[121],"knowledge":[123],"distillation.":[124],"Extensive":[125],"experiments":[126],"image":[128],"video":[130],"benchmarks":[132],"show":[133],"SAM3-LiteText":[135],"reduces":[136],"parameters":[139],"up":[141],"88%,":[143],"substantially":[144],"reducing":[145],"static":[146],"footprint,":[148],"while":[149],"maintaining":[150],"performance":[152],"comparable":[153],"model.":[157],"Code:":[158],"https://github.com/SimonZeng7108/efficientsam3/tree/sam3_litetext.":[159]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
