{"id":"https://openalex.org/W4417124981","doi":"https://doi.org/10.1145/3757377.3763917","title":"OmnimatteZero: Fast Training-free Omnimatte with Pre-trained Video Diffusion Models","display_name":"OmnimatteZero: Fast Training-free Omnimatte with Pre-trained Video Diffusion Models","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W4417124981","doi":"https://doi.org/10.1145/3757377.3763917"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763917","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763917","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3757377.3763917","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029218807","display_name":"Dvir Samuel","orcid":"https://orcid.org/0000-0003-3573-2220"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Dvir Samuel","raw_affiliation_strings":["Bar-Ilan University, Ramat-Gan, Israel and OriginAI, Ramat-Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University, Ramat-Gan, Israel and OriginAI, Ramat-Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114248281","display_name":"M.Y. Levy","orcid":"https://orcid.org/0009-0008-2915-1618"},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Matan Levy","raw_affiliation_strings":["Hebrew University of Jerusalem, Jerusalem, Israel"],"affiliations":[{"raw_affiliation_string":"Hebrew University of Jerusalem, Jerusalem, Israel","institution_ids":["https://openalex.org/I197251160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062970130","display_name":"Nir Darshan","orcid":"https://orcid.org/0009-0004-0652-9519"},"institutions":[{"id":"https://openalex.org/I2799530024","display_name":"ORT Israel","ror":"https://ror.org/010dms986","country_code":"IL","type":"nonprofit","lineage":["https://openalex.org/I2799530024"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Nir Darshan","raw_affiliation_strings":["OriginAI, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"OriginAI, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I2799530024"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045719865","display_name":"Gal Chechik","orcid":"https://orcid.org/0000-0001-9164-5303"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]},{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Gal Chechik","raw_affiliation_strings":["NVIDIA Research, Tel-Aviv, Israel and Bar-Ilan University, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"NVIDIA Research, Tel-Aviv, Israel and Bar-Ilan University, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I16391192","https://openalex.org/I13955877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000712908","display_name":"Rami Ben\u2010Ari","orcid":"https://orcid.org/0000-0001-7162-7351"},"institutions":[{"id":"https://openalex.org/I2799530024","display_name":"ORT Israel","ror":"https://ror.org/010dms986","country_code":"IL","type":"nonprofit","lineage":["https://openalex.org/I2799530024"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Rami Ben-Ari","raw_affiliation_strings":["OriginAI, Tel-Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"OriginAI, Tel-Aviv, Israel","institution_ids":["https://openalex.org/I2799530024"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5029218807"],"corresponding_institution_ids":["https://openalex.org/I13955877"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.42810123,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.013000000268220901,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6809999942779541},{"id":"https://openalex.org/keywords/inpainting","display_name":"Inpainting","score":0.6504999995231628},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6035000085830688},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5519999861717224},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4893999993801117},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.482699990272522},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.42419999837875366},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.3476000130176544}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7739999890327454},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7447999715805054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7098000049591064},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6809999942779541},{"id":"https://openalex.org/C11727466","wikidata":"https://www.wikidata.org/wiki/Q1628157","display_name":"Inpainting","level":3,"score":0.6504999995231628},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6035000085830688},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5519999861717224},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4893999993801117},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.482699990272522},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.42419999837875366},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.3407000005245209},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C106030495","wikidata":"https://www.wikidata.org/wiki/Q1797012","display_name":"Video compression picture types","level":4,"score":0.3003000020980835},{"id":"https://openalex.org/C141379421","wikidata":"https://www.wikidata.org/wiki/Q6094427","display_name":"Iterative reconstruction","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.29260000586509705},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.2612999975681305},{"id":"https://openalex.org/C20894473","wikidata":"https://www.wikidata.org/wiki/Q1116105","display_name":"Object model","level":3,"score":0.2567000091075897},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C2780310081","wikidata":"https://www.wikidata.org/wiki/Q1154312","display_name":"Video editing","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C172849965","wikidata":"https://www.wikidata.org/wiki/Q3148875","display_name":"Reference frame","level":3,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763917","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763917","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3757377.3763917","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763917","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2105038642","https://openalex.org/W2133059825","https://openalex.org/W2142912032","https://openalex.org/W2147823818","https://openalex.org/W2244837655","https://openalex.org/W2886714066","https://openalex.org/W2988407809","https://openalex.org/W3097348442","https://openalex.org/W3104310079","https://openalex.org/W3190025040","https://openalex.org/W4206046513","https://openalex.org/W4312497550","https://openalex.org/W4312598744","https://openalex.org/W4312912442","https://openalex.org/W4312933868","https://openalex.org/W4386113261","https://openalex.org/W4390871749","https://openalex.org/W4390872458","https://openalex.org/W4390872572","https://openalex.org/W4394625844","https://openalex.org/W4403843905","https://openalex.org/W4404965692","https://openalex.org/W4408345917","https://openalex.org/W4412587860"],"related_works":[],"abstract_inverted_index":{"In":[0,39],"Omnimatte,":[1],"one":[2],"aims":[3],"to":[4,95,141,167],"decompose":[5],"a":[6,45,91,147,188],"given":[7],"video":[8,52,88,165],"into":[9],"semantically":[10],"meaningful":[11],"layers,":[12],"including":[13],"the":[14,112,133,143,192],"background":[15,122,183],"and":[16,27,71,105,119,135,138,160],"individual":[17,64],"objects":[18,60,74],"along":[19,67],"with":[20,68,163,199],"their":[21,69],"associated":[22],"effects,":[23,70,145],"such":[24],"as":[25],"shadows":[26],"reflections.":[28],"Existing":[29],"methods":[30],"often":[31],"require":[32],"extensive":[33],"training":[34],"or":[35],"costly":[36],"self-supervised":[37],"optimization.":[38],"this":[40],"paper,":[41],"we":[42,102],"present":[43],"OmnimatteZero,":[44],"training-free":[46],"approach":[47],"that":[48,110,127,173],"leverages":[49],"off-the-shelf":[50],"pre-trained":[51],"diffusion":[53,113],"models":[54],"for":[55,87,115,191],"omnimatte.":[56],"It":[57],"can":[58,157],"remove":[59],"from":[61],"videos,":[62],"extract":[63],"object":[65,89,117,134,155],"layers":[66,156,166],"composite":[72],"those":[73],"onto":[75],"new":[76,164,169,189],"videos.":[77,170],"These":[78],"are":[79],"accomplished":[80],"by":[81],"adapting":[82],"zero-shot":[83],"image":[84],"inpainting":[85],"techniques":[86],"removal,":[90],"task":[92],"they":[93],"fail":[94],"handle":[96],"effectively":[97],"out-of-the-box.":[98],"To":[99],"overcome":[100],"this,":[101],"introduce":[103],"temporal":[104],"spatial":[106],"attention":[107],"guidance":[108],"modules":[109],"steer":[111],"process":[114],"accurate":[116],"removal":[118],"temporally":[120],"consistent":[121],"reconstruction.":[123],"We":[124],"further":[125],"show":[126,172],"self-attention":[128],"maps":[129],"capture":[130],"information":[131],"about":[132],"its":[136],"footprints":[137],"use":[139],"them":[140],"inpaint":[142],"object\u2019s":[144],"leaving":[146],"clean":[148],"background.":[149],"Additionally,":[150],"through":[151],"simple":[152],"latent":[153],"arithmetic,":[154],"be":[158],"isolated":[159],"recombined":[161],"seamlessly":[162],"produce":[168],"Evaluations":[171],"OmnimatteZero":[174],"not":[175],"only":[176],"achieves":[177],"superior":[178],"performance":[179,198],"in":[180],"terms":[181],"of":[182],"reconstruction":[184],"but":[185],"also":[186],"sets":[187],"record":[190],"fastest":[193],"Omnimatte":[194],"approach,":[195],"achieving":[196],"real-time":[197],"minimal":[200],"frame":[201],"runtime.":[202],"Project":[203],"Page.":[204]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-08T00:00:00"}
