{"id":"https://openalex.org/W4413158376","doi":"https://doi.org/10.1109/cvpr52734.2025.01886","title":"Towards Efficient Foundation Model for Zero-shot Amodal Segmentation","display_name":"Towards Efficient Foundation Model for Zero-shot Amodal Segmentation","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413158376","doi":"https://doi.org/10.1109/cvpr52734.2025.01886"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.01886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089956129","display_name":"Zhaochen Liu","orcid":"https://orcid.org/0009-0004-8024-9590"},"institutions":[{"id":"https://openalex.org/I173029219","display_name":"Multimedia University","ror":"https://ror.org/04zrbnc33","country_code":"MY","type":"education","lineage":["https://openalex.org/I173029219"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Zhaochen Liu","raw_affiliation_strings":["Peking University,National Engineering Research Center of Visual Technology, National Key Laboratory for Multimedia Information Processing, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,National Engineering Research Center of Visual Technology, National Key Laboratory for Multimedia Information Processing, School of Computer Science","institution_ids":["https://openalex.org/I173029219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085610502","display_name":"Limeng Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Limeng Qiao","raw_affiliation_strings":["Meituan Inc"],"affiliations":[{"raw_affiliation_string":"Meituan Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101512474","display_name":"Xiangxiang Chu","orcid":"https://orcid.org/0000-0003-2548-0605"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiangxiang Chu","raw_affiliation_strings":["Meituan Inc"],"affiliations":[{"raw_affiliation_string":"Meituan Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026035207","display_name":"Lin Ma","orcid":"https://orcid.org/0000-0002-8172-5788"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin Ma","raw_affiliation_strings":["Meituan Inc"],"affiliations":[{"raw_affiliation_string":"Meituan Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101606698","display_name":"Tingting Jiang","orcid":"https://orcid.org/0000-0002-5372-0656"},"institutions":[{"id":"https://openalex.org/I173029219","display_name":"Multimedia University","ror":"https://ror.org/04zrbnc33","country_code":"MY","type":"education","lineage":["https://openalex.org/I173029219"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Tingting Jiang","raw_affiliation_strings":["Peking University,National Engineering Research Center of Visual Technology, National Key Laboratory for Multimedia Information Processing, School of Computer Science"],"affiliations":[{"raw_affiliation_string":"Peking University,National Engineering Research Center of Visual Technology, National Key Laboratory for Multimedia Information Processing, School of Computer Science","institution_ids":["https://openalex.org/I173029219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5089956129"],"corresponding_institution_ids":["https://openalex.org/I173029219"],"apc_list":null,"apc_paid":null,"fwci":2.6381,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.91287339,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"20254","last_page":"20264"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5757023692131042},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.5690225958824158},{"id":"https://openalex.org/keywords/amodal-perception","display_name":"Amodal perception","score":0.5646140575408936},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5457044839859009},{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.539506196975708},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41731739044189453},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3437626361846924},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07755604386329651},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.07149490714073181}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5757023692131042},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.5690225958824158},{"id":"https://openalex.org/C174478892","wikidata":"https://www.wikidata.org/wiki/Q4747455","display_name":"Amodal perception","level":3,"score":0.5646140575408936},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5457044839859009},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.539506196975708},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41731739044189453},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3437626361846924},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07755604386329651},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.07149490714073181},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.01886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01886","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W2150066425","https://openalex.org/W2194775991","https://openalex.org/W2344939607","https://openalex.org/W2947482671","https://openalex.org/W2952122856","https://openalex.org/W2963243172","https://openalex.org/W2963539956","https://openalex.org/W2963660453","https://openalex.org/W2964325922","https://openalex.org/W2967977974","https://openalex.org/W3003757902","https://openalex.org/W3011334834","https://openalex.org/W3035637413","https://openalex.org/W3107828565","https://openalex.org/W3167260844","https://openalex.org/W3173230514","https://openalex.org/W3177323791","https://openalex.org/W3180993716","https://openalex.org/W3201721053","https://openalex.org/W4205991051","https://openalex.org/W4285813072","https://openalex.org/W4289462076","https://openalex.org/W4294811430","https://openalex.org/W4312385376","https://openalex.org/W4312498343","https://openalex.org/W4312651322","https://openalex.org/W4321460497","https://openalex.org/W4372267075","https://openalex.org/W4382464606","https://openalex.org/W4385245566","https://openalex.org/W4385572883","https://openalex.org/W4389524317","https://openalex.org/W4390872659","https://openalex.org/W4390873795","https://openalex.org/W4390874575","https://openalex.org/W4393154526","https://openalex.org/W4394593151","https://openalex.org/W4402702973","https://openalex.org/W4402715933","https://openalex.org/W4402716421","https://openalex.org/W4402754134","https://openalex.org/W4404612908","https://openalex.org/W4406208390"],"related_works":["https://openalex.org/W3158435931","https://openalex.org/W1589158839","https://openalex.org/W2048200892","https://openalex.org/W4284674805","https://openalex.org/W4321460497","https://openalex.org/W2153903859","https://openalex.org/W4294017904","https://openalex.org/W4387775854","https://openalex.org/W2086050082","https://openalex.org/W2951289157"],"abstract_inverted_index":{"Aiming":[0],"to":[1,21,76,105,113,127,150,153,175],"predict":[2],"the":[3,23,30,53,59,79,84,90,110,115,133,155,165,169],"complete":[4],"shape":[5],"of":[6,87,117,130,171,191],"partially":[7],"occluded":[8],"objects,":[9],"amodal":[10,62,103,118],"segmentation":[11,63,104],"is":[12,74,96,137,173],"an":[13],"important":[14],"capacity":[15],"towards":[16],"visual":[17],"intelligence.":[18],"In":[19,89,109,132],"order":[20],"promote":[22],"practicability,":[24],"zero-shot":[25,177],"foundation":[26,64],"model":[27,185],"competent":[28],"for":[29],"open":[31],"world":[32],"gains":[33],"growing":[34],"attention":[35],"in":[36,44,189],"this":[37,50],"field.":[38],"Nevertheless,":[39],"prior":[40,55],"models":[41],"exhibit":[42],"deficiencies":[43],"efficiency":[45],"and":[46,82,102,164,178,193],"stability.":[47],"To":[48],"address":[49],"problem,":[51],"utilizing":[52],"implicit":[54],"knowledge,":[56],"we":[57,121],"propose":[58],"first":[60],"SAM-based":[61],"model,":[65],"SAMBA.":[66],"Methodologically,":[67],"a":[68,93,123,140],"novel":[69],"framework":[70],"with":[71,139],"multilevel":[72],"facilitation":[73],"designed":[75],"better":[77],"adapt":[78],"task":[80],"characteristics":[81],"unleash":[83],"potential":[85],"capabilities":[86],"SAM.":[88],"modality":[91],"level,":[92,112,135],"separation-to-fusion":[94],"structure":[95],"employed":[97],"that":[98,168],"jointly":[99],"learns":[100],"modal":[101],"enhance":[106],"mutual":[107],"coordination.":[108],"instance":[111],"ease":[114],"complexity":[116],"feature":[119],"extraction,":[120],"introduce":[122],"principal":[124],"focusing":[125],"mechanism":[126],"indicate":[128],"objects":[129],"interest.":[131],"pixel":[134],"mixture-of-experts":[136],"incorporated":[138],"specialized":[141],"distribution":[142],"loss,":[143],"by":[144],"which":[145],"distinct":[146],"occlusion":[147],"rates":[148],"correspond":[149],"different":[151],"experts":[152],"improve":[154],"accuracy.":[156],"Experiments":[157],"are":[158],"conducted":[159],"on":[160],"several":[161],"eminent":[162],"datasets,":[163],"results":[166],"show":[167],"performance":[170],"SAMBA":[172],"superior":[174],"existing":[176],"even":[179],"supervised":[180],"approaches.":[181],"Furthermore,":[182],"our":[183],"proposed":[184],"has":[186],"notable":[187],"advantages":[188],"terms":[190],"speed":[192],"size.":[194]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
