{"id":"https://openalex.org/W4320518449","doi":"https://doi.org/10.48550/arxiv.2208.10547","title":"InstanceFormer: An Online Video Instance Segmentation Framework","display_name":"InstanceFormer: An Online Video Instance Segmentation Framework","publication_year":2022,"publication_date":"2022-08-22","ids":{"openalex":"https://openalex.org/W4320518449","doi":"https://doi.org/10.48550/arxiv.2208.10547"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2208.10547","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.10547","pdf_url":"https://arxiv.org/pdf/2208.10547","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2208.10547","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047843370","display_name":"Rajat Koner","orcid":"https://orcid.org/0000-0003-3441-8192"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Koner, Rajat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002790182","display_name":"Tanveer Hannan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hannan, Tanveer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018632773","display_name":"Suprosanna Shit","orcid":"https://orcid.org/0000-0003-4435-7207"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shit, Suprosanna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039442998","display_name":"Sahand Sharifzadeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sharifzadeh, Sahand","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003177894","display_name":"Matthias Schubert","orcid":"https://orcid.org/0000-0002-6566-6343"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schubert, Matthias","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003335849","display_name":"Thomas Seidl","orcid":"https://orcid.org/0000-0002-4861-1412"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seidl, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074808403","display_name":"Volker Tresp","orcid":"https://orcid.org/0000-0001-9428-3686"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tresp, Volker","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5047843370"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8693675398826599},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6605218648910522},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5631400942802429},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5428062677383423},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.47255560755729675},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.45961612462997437},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4414524435997009},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4264846444129944},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33564722537994385}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8693675398826599},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6605218648910522},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5631400942802429},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5428062677383423},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.47255560755729675},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.45961612462997437},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4414524435997009},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4264846444129944},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33564722537994385},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2208.10547","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.10547","pdf_url":"https://arxiv.org/pdf/2208.10547","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2208.10547","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2208.10547","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2208.10547","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2208.10547","pdf_url":"https://arxiv.org/pdf/2208.10547","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/12","score":0.4699999988079071,"display_name":"Responsible consumption and production"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574","https://openalex.org/W4246352526","https://openalex.org/W2121910908"],"abstract_inverted_index":{"Recent":[0],"transformer-based":[1,50],"offline":[2,175],"video":[3,22],"instance":[4,135],"segmentation":[5],"(VIS)":[6],"approaches":[7,176],"achieve":[8],"encouraging":[9],"results":[10],"and":[11,23,63,74,77,86,141,179,185],"significantly":[12],"outperform":[13],"online":[14,52,161],"approaches.":[15],"However,":[16],"their":[17],"reliance":[18],"on":[19],"the":[20,24,83,104,108,131],"whole":[21],"immense":[25],"computational":[26],"complexity":[27],"caused":[28],"by":[29,164],"full":[30],"Spatio-temporal":[31],"attention":[32,140],"limit":[33],"them":[34],"in":[35,103,130],"real-life":[36],"applications":[37],"such":[38,182],"as":[39,183],"processing":[40],"lengthy":[41],"videos.":[42,65],"In":[43],"this":[44],"paper,":[45],"we":[46,81,97,121],"propose":[47,67,98],"a":[48,99,116,123,165],"single-stage":[49],"efficient":[51],"VIS":[53],"framework":[54],"named":[55],"InstanceFormer,":[56],"which":[57,106],"is":[58,188],"especially":[59],"suitable":[60],"for":[61,177],"long":[62,180],"challenging":[64,152,178],"We":[66],"three":[68],"novel":[69,100],"components":[70],"to":[71,92,110,127,147],"model":[72,93],"short-term":[73,94],"long-term":[75],"dependency":[76,149],"temporal":[78,118,124,142],"coherence.":[79],"First,":[80],"propagate":[82],"representation,":[84],"location,":[85],"semantic":[87],"information":[88],"of":[89,133],"prior":[90],"instances":[91,114],"changes.":[95],"Second,":[96],"memory":[101],"cross-attention":[102],"decoder,":[105],"allows":[107],"network":[109],"look":[111],"into":[112],"earlier":[113],"within":[115],"certain":[117],"window.":[119],"Finally,":[120],"employ":[122],"contrastive":[125],"loss":[126],"impose":[128],"coherence":[129,143],"representation":[132],"an":[134],"across":[136,168],"all":[137],"frames.":[138],"Memory":[139],"are":[144],"particularly":[145],"beneficial":[146],"long-range":[148],"modeling,":[150],"including":[151],"scenarios":[153],"like":[154],"occlusion.":[155],"The":[156],"proposed":[157],"InstanceFormer":[158,173],"outperforms":[159],"previous":[160],"benchmark":[162],"methods":[163],"large":[166],"margin":[167],"multiple":[169],"datasets.":[170],"Most":[171],"importantly,":[172],"surpasses":[174],"datasets":[181],"YouTube-VIS-2021":[184],"OVIS.":[186],"Code":[187],"available":[189],"at":[190],"https://github.com/rajatkoner08/InstanceFormer.":[191]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
