{"id":"https://openalex.org/W7158370857","doi":"https://doi.org/10.48550/arxiv.2604.24876","title":"ESICA: A Scalable Framework for Text-Guided 3D Medical Image Segmentation","display_name":"ESICA: A Scalable Framework for Text-Guided 3D Medical Image Segmentation","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7158370857","doi":"https://doi.org/10.48550/arxiv.2604.24876"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24876","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24876","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24876","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078248262","display_name":"Yu Xin","orcid":"https://orcid.org/0000-0002-0696-4658"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049952013","display_name":"Gorkem Can Ates","orcid":"https://orcid.org/0000-0002-3424-8587"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ates, Gorkem Can","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134867032","display_name":"Jun Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134849446","display_name":"Sumin Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Sumin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134870397","display_name":"Ying Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035188223","display_name":"Kaleb E. Smith","orcid":"https://orcid.org/0000-0003-1532-5919"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smith, Kaleb E","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013691634","display_name":"Kuang Gong","orcid":"https://orcid.org/0000-0002-2669-2610"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gong, Kuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134853991","display_name":"Wei Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5216000080108643,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5216000080108643,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.09860000014305115,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08160000294446945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7267000079154968},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6650999784469604},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5315999984741211},{"id":"https://openalex.org/keywords/scale-space-segmentation","display_name":"Scale-space segmentation","score":0.47999998927116394},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.46369999647140503},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.46149998903274536},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4296000003814697},{"id":"https://openalex.org/keywords/medical-imaging","display_name":"Medical imaging","score":0.4205999970436096},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.4059999883174896}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7879999876022339},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7267000079154968},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6650999784469604},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5493999719619751},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5315999984741211},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.47999998927116394},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.46369999647140503},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.46149998903274536},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4296000003814697},{"id":"https://openalex.org/C31601959","wikidata":"https://www.wikidata.org/wiki/Q931309","display_name":"Medical imaging","level":2,"score":0.4205999970436096},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4171999990940094},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40380001068115234},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4027999937534332},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3833000063896179},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.3601999878883362},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.31610000133514404},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.30489999055862427},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.29910001158714294},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.29170000553131104},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.26669999957084656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24876","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24876","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24876","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24876","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Text":[0],"guided":[1,51,193],"3D":[2],"medical":[3],"image":[4],"segmentation":[5,166,176,194],"offers":[6],"a":[7,73,86,111,131,183],"flexible":[8],"alternative":[9],"to":[10,21,65],"class":[11],"based":[12,16,89],"and":[13,41,63,75,109,119,127,158,198],"spatial":[14],"prompt":[15],"models":[17],"by":[18,141],"allowing":[19],"users":[20],"specify":[22],"regions":[23],"of":[24,136,163],"interest":[25],"directly":[26],"in":[27],"natural":[28],"language.":[29],"This":[30],"paradigm":[31],"avoids":[32],"reliance":[33],"on":[34],"predefined":[35],"label":[36],"sets,":[37],"reduces":[38],"ambiguous":[39],"outputs,":[40],"aligns":[42],"more":[43],"naturally":[44],"with":[45,102,178],"clinical":[46],"workflows.":[47],"However,":[48],"existing":[49],"text":[50,59,192],"frameworks":[52],"are":[53],"often":[54],"computationally":[55],"expensive,":[56],"exhibit":[57],"weak":[58],"volume":[60],"feature":[61],"alignment,":[62,96],"fail":[64],"capture":[66],"fine":[67,143],"anatomical":[68],"details.":[69],"We":[70],"propose":[71],"ESICA,":[72],"lightweight":[74],"scalable":[76],"framework":[77,190],"that":[78,93,116],"addresses":[79],"these":[80],"challenges":[81],"through":[82],"three":[83],"innovations:":[84],"(1)":[85],"similarity":[87],"matrix":[88],"mask":[90],"prediction":[91],"formulation":[92],"enhances":[94],"semantic":[95],"(2)":[97],"an":[98],"efficient":[99],"decomposed":[100],"decoder":[101],"adapter":[103],"modules":[104],"for":[105],"accurate":[106],"volumetric":[107],"decoding,":[108],"(3)":[110],"two":[112,132],"pass":[113],"refinement":[114],"strategy":[115],"sharpens":[117],"boundaries":[118],"resolves":[120],"uncertain":[121],"regions.":[122],"To":[123],"improve":[124],"training":[125],"stability":[126],"generalization,":[128],"ESICA":[129,160],"adopts":[130],"stage":[133],"scheme":[134],"consisting":[135],"positive":[137],"only":[138],"pretraining":[139],"followed":[140],"balanced":[142],"tuning.":[144],"On":[145],"the":[146,164,169],"CVPR":[147],"BiomedSegFM":[148],"benchmark":[149],"spanning":[150],"five":[151],"imaging":[152],"modalities":[153],"(CT,":[154],"MRI,":[155],"PET,":[156],"ultrasound,":[157],"microscopy),":[159],"achieves":[161],"state":[162],"art":[165],"accuracy,":[167],"while":[168],"compact":[170],"ESICA4":[171],"Lite":[172],"variant":[173],"attains":[174],"similar":[175],"performance":[177],"substantially":[179],"fewer":[180],"parameters,":[181],"yielding":[182],"superior":[184],"efficiency":[185],"accuracy":[186],"trade":[187],"off.":[188],"Our":[189],"advances":[191],"toward":[195],"efficient,":[196],"scalable,":[197],"clinically":[199],"deployable":[200],"systems.":[201],"Code":[202],"will":[203],"be":[204],"made":[205],"publicly":[206],"available":[207],"at":[208],"https://github.com/mirthAI/ESICA.":[209]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-30T00:00:00"}
