{"id":"https://openalex.org/W7134836508","doi":"https://doi.org/10.48550/arxiv.2603.07307","title":"StructSAM: Structure- and Spectrum-Preserving Token Merging for Segment Anything Models","display_name":"StructSAM: Structure- and Spectrum-Preserving Token Merging for Segment Anything Models","publication_year":2026,"publication_date":"2026-03-07","ids":{"openalex":"https://openalex.org/W7134836508","doi":"https://doi.org/10.48550/arxiv.2603.07307"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.07307","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050539353","display_name":"Duy M. H. Nguyen","orcid":"https://orcid.org/0009-0008-6007-7786"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Duy M. H.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128663604","display_name":"Tuan A. Tran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tran, Tuan A.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128674265","display_name":"Duong Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Duong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128668243","display_name":"Siwei Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Siwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128669081","display_name":"Trung Q. Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Trung Q.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054010114","display_name":"Mai T. N. Truong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Truong, Mai T. N.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076387697","display_name":"Daniel Palenicek","orcid":"https://orcid.org/0000-0002-8292-1318"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Palenicek, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128686351","display_name":"An T. Le","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, An T.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037664911","display_name":"Michael Barz","orcid":"https://orcid.org/0000-0001-6730-2466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barz, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124122659","display_name":"TrungTin Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, TrungTin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061282536","display_name":"Tuan Dam","orcid":"https://orcid.org/0000-0001-7422-139X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dam, Tuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128677330","display_name":"Ngan Le","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Ngan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128644860","display_name":"Minh Nhat Vu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Minh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128633115","display_name":"Khoa Doan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Doan, Khoa","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128683143","display_name":"Vien Ngo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ngo, Vien","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128636186","display_name":"Pengtao Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Pengtao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128646745","display_name":"James Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, James","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069432042","display_name":"Daniel Sonntag","orcid":"https://orcid.org/0000-0002-8857-8709"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sonntag, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128650024","display_name":"Jan Peters","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peters, Jan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5031719069","display_name":"Mathias Niepert","orcid":"https://orcid.org/0000-0002-8401-3751"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niepert, Mathias","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":20,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2849999964237213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2849999964237213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.19380000233650208,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1860000044107437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6391000151634216},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5899999737739563},{"id":"https://openalex.org/keywords/bounded-function","display_name":"Bounded function","score":0.4699000120162964},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.4271000027656555},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.4108999967575073},{"id":"https://openalex.org/keywords/laplacian-matrix","display_name":"Laplacian matrix","score":0.36579999327659607},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.35040000081062317},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.34869998693466187}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7128999829292297},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6391000151634216},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5899999737739563},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4763000011444092},{"id":"https://openalex.org/C34388435","wikidata":"https://www.wikidata.org/wiki/Q2267362","display_name":"Bounded function","level":2,"score":0.4699000120162964},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.4271000027656555},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.4108999967575073},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.40939998626708984},{"id":"https://openalex.org/C115178988","wikidata":"https://www.wikidata.org/wiki/Q772067","display_name":"Laplacian matrix","level":3,"score":0.36579999327659607},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35199999809265137},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.34869998693466187},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.33629998564720154},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C2988416141","wikidata":"https://www.wikidata.org/wiki/Q6031139","display_name":"Information loss","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C165700671","wikidata":"https://www.wikidata.org/wiki/Q203484","display_name":"Laplace operator","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.27799999713897705},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.07307","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.07307","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.07307","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.07307","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.620119035243988,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"token":[1,131],"merging":[2,144],"techniques":[3],"for":[4,53],"Vision":[5],"Transformers":[6],"(ViTs)":[7],"provide":[8,135],"substantial":[9],"speedups":[10],"by":[11,18,166],"reducing":[12],"the":[13,28,188],"number":[14],"of":[15],"tokens":[16,122],"processed":[17],"self-attention,":[19],"often":[20],"without":[21],"retraining.":[22],"However,":[23],"their":[24],"direct":[25],"application":[26],"to":[27,98,114,151,169],"Segment":[29],"Anything":[30],"Model":[31],"(SAM)":[32],"family":[33],"is":[34],"nontrivial:":[35],"SAM's":[36],"image":[37],"encoder":[38,164],"mixes":[39],"windowed":[40],"and":[41,44,65,73,82,117,120,159,185],"global":[42],"attention,":[43],"its":[45],"mask":[46],"decoder":[47],"relies":[48],"on":[49,63],"dense,":[50],"prompt-conditioned":[51],"features":[52],"precise":[54],"boundary":[55,116],"prediction.":[56],"We":[57,90,133],"systematically":[58],"evaluate":[59],"representative":[60],"token-merging":[61],"methods":[62],"SAM":[64,67],"Medical":[66],"in":[68,177],"a":[69,93,102,136],"strict":[70],"off-the-shelf":[71],"setting,":[72],"find":[74],"that":[75,142],"existing":[76],"destination-selection":[77],"heuristics":[78],"can":[79],"erode":[80],"boundaries":[81],"leak":[83],"prompt":[84,118],"information":[85],"as":[86],"merge":[87],"rates":[88],"increase.":[89],"propose":[91],"\\textbf{StructSAM},":[92],"resolution-preserving":[94],"merge-unmerge":[95],"framework":[96],"tailored":[97],"SAM.":[99],"StructSAM":[100,162],"computes":[101],"lightweight":[103],"token-energy":[104],"score":[105],"from":[106],"first-order":[107],"feature":[108],"gradients,":[109],"uses":[110],"grid-based":[111],"flatness":[112],"screening":[113],"protect":[115],"regions,":[119],"merges":[121],"within":[123],"flat":[124],"areas":[125],"toward":[126],"low-energy":[127],"destinations":[128],"with":[129,171,174],"explicit":[130],"recovery.":[132],"further":[134],"spectral":[137,148],"graph":[138],"coarsening":[139],"view":[140],"showing":[141],"score-guided":[143],"yields":[145],"bounded":[146],"Laplacian":[147],"distortion":[149],"compared":[150],"random":[152],"or":[153],"window-restricted":[154],"baselines.":[155],"Across":[156],"eight":[157],"natural":[158],"medical":[160],"benchmarks,":[161],"reduces":[163],"FLOPs":[165],"25-30\\%":[167],"(up":[168],"40\\%+":[170],"prompt-aware":[172],"merging)":[173],"minor":[175],"drops":[176],"mIoU/Dice,":[178],"consistently":[179],"outperforming":[180],"ToMe,":[181],"PiToMe,":[182],"ToMeSD,":[183],"VidToMe,":[184],"ALGM":[186],"at":[187],"same":[189],"compute.":[190]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
