{"id":"https://openalex.org/W4415861743","doi":"https://doi.org/10.1109/iccv51701.2025.01927","title":"MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning","display_name":"MOBIUS: Big-to-Mobile Universal Instance Segmentation via Multi-modal Bottleneck Fusion and Calibrated Decoder Pruning","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4415861743","doi":"https://doi.org/10.1109/iccv51701.2025.01927"},"language":null,"primary_location":{"id":"doi:10.1109/iccv51701.2025.01927","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01927","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.15026","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025696769","display_name":"Mattia Seg\u00f9","orcid":"https://orcid.org/0000-0002-9107-531X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mattia Segu","raw_affiliation_strings":["Google"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120247614","display_name":"Marta Tintore Gazulla","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marta Tintore Gazulla","raw_affiliation_strings":["Google"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012209802","display_name":"Yongqin Xian","orcid":"https://orcid.org/0000-0001-7186-1295"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongqin Xian","raw_affiliation_strings":["Google"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001254143","display_name":"Luc Van Gool","orcid":"https://orcid.org/0000-0002-3445-5711"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luc Van Gool","raw_affiliation_strings":["Sofia University,INSAIT"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sofia University,INSAIT","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041092666","display_name":"Federico Tombari","orcid":"https://orcid.org/0000-0001-5598-5212"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Federico Tombari","raw_affiliation_strings":["Google"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28057341,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"20726","last_page":"20736"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9233999848365784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9233999848365784,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.019700000062584877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.008299999870359898,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6068000197410583},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5716000199317932},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5231999754905701},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.4196000099182129},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.41449999809265137},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.41019999980926514},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.40799999237060547},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.39959999918937683},{"id":"https://openalex.org/keywords/ranging","display_name":"Ranging","score":0.37709999084472656}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8227999806404114},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6068000197410583},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5716000199317932},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5231999754905701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47679999470710754},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.45890000462532043},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.4196000099182129},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.41449999809265137},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.41019999980926514},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.40799999237060547},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.39959999918937683},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.37709999084472656},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.366100013256073},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3573000133037567},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.34540000557899475},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3296000063419342},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32839998602867126},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31459999084472656},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3124000132083893},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3100999891757965},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.2996000051498413},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C2776061582","wikidata":"https://www.wikidata.org/wiki/Q25325231","display_name":"Mobile edge computing","level":3,"score":0.2856999933719635},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2793999910354614},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.26980000734329224},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.26260000467300415},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.01927","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.01927","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2510.15026","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.15026","pdf_url":"https://arxiv.org/pdf/2510.15026","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.15026","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.15026","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.15026","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.15026","pdf_url":"https://arxiv.org/pdf/2510.15026","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scaling":[0],"up":[1,133],"model":[2],"size":[3],"and":[4,17,23,83,96,109,128,136,164],"training":[5,82,114,149],"data":[6],"has":[7],"advanced":[8],"foundation":[9,58],"models":[10,59],"for":[11,60,65,93,105,122,156],"instance-level":[12],"perception,":[13],"achieving":[14],"state-of-the-art":[15,141],"in-domain":[16],"zero-shot":[18],"performance":[19,142],"across":[20,71],"object":[21],"detection":[22],"segmentation.":[24],"However,":[25],"their":[26],"high":[27],"computational":[28],"cost":[29],"limits":[30],"adoption":[31],"on":[32,159],"resource-constrained":[33],"platforms.":[34],"We":[35,51],"first":[36],"examine":[37],"the":[38,148],"limitations":[39],"of":[40,57,147],"existing":[41],"architectures":[42],"in":[43,143],"enabling":[44],"efficient":[45,94,117,157],"edge":[46],"deployment":[47,70],"without":[48],"compromising":[49],"performance.":[50],"then":[52],"introduce":[53],"MOBIUS,":[54],"a":[55,89,100,111,145,153],"family":[56],"universal":[61],"instance":[62],"segmentation,":[63],"designed":[64],"Pareto-optimal":[66],"downscaling":[67],"to":[68,77,134],"support":[69],"devices":[72],"ranging":[73],"from":[74],"high-end":[75],"accelerators":[76],"mobile":[78,165],"hardware.":[79],"To":[80],"reduce":[81],"inference":[84],"demands,":[85],"we":[86],"propose:":[87],"(i)":[88],"bottleneck":[90],"pixel":[91,127],"decoder":[92,107,130],"multi-scale":[95],"multi-modal":[97],"fusion,":[98],"(ii)":[99],"language-guided":[101],"uncertainty":[102],"calibration":[103],"loss":[104],"adaptive":[106],"pruning,":[108],"(iii)":[110],"streamlined,":[112],"unified":[113],"strategy.":[115],"Unlike":[116],"baselines":[118],"that":[119],"trade":[120],"accuracy":[121],"reduced":[123],"complexity,":[124],"MOBIUS":[125,151],"reduces":[126],"transformer":[129],"FLOPs":[131],"by":[132],"55%":[135],"75%,":[137],"respectively,":[138],"while":[139],"maintaining":[140],"just":[144],"third":[146],"iterations.":[150],"establishes":[152],"new":[154],"benchmark":[155],"segmentation":[158],"both":[160],"high-performance":[161],"computing":[162],"platforms":[163],"devices.":[166]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-21T00:00:00"}
