{"id":"https://openalex.org/W4283827327","doi":"https://doi.org/10.1145/3503161.3547824","title":"Towards Robust Video Object Segmentation with Adaptive Object Calibration","display_name":"Towards Robust Video Object Segmentation with Adaptive Object Calibration","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4283827327","doi":"https://doi.org/10.1145/3503161.3547824"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3547824","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547824","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018803784","display_name":"Xiaohao Xu","orcid":"https://orcid.org/0000-0002-6750-9704"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaohao Xu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070492669","display_name":"Jinglu Wang","orcid":"https://orcid.org/0000-0002-3222-6579"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinglu Wang","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100724142","display_name":"Ming Xiang","orcid":"https://orcid.org/0000-0003-2284-3685"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Ming","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100756584","display_name":"Yan Lu","orcid":"https://orcid.org/0000-0001-5383-6424"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Lu","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018803784"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":null,"apc_paid":null,"fwci":1.1994,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.85819193,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2709","last_page":"2718"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8281341791152954},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8115347623825073},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7765777707099915},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.7373048663139343},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7355091571807861},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6615172624588013},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.6324187517166138},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5374880433082581},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.5002536773681641},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4312293231487274},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3772923946380615}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8281341791152954},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8115347623825073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7765777707099915},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7373048663139343},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7355091571807861},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6615172624588013},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.6324187517166138},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5374880433082581},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.5002536773681641},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4312293231487274},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3772923946380615},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3547824","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547824","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.6800000071525574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W114517082","https://openalex.org/W2010491386","https://openalex.org/W2065791169","https://openalex.org/W2194775991","https://openalex.org/W2470139095","https://openalex.org/W2564998703","https://openalex.org/W2603777577","https://openalex.org/W2799157347","https://openalex.org/W2889658408","https://openalex.org/W2889986507","https://openalex.org/W2911981226","https://openalex.org/W2916797271","https://openalex.org/W2962974533","https://openalex.org/W2963253279","https://openalex.org/W2963445119","https://openalex.org/W2963548592","https://openalex.org/W2963732700","https://openalex.org/W2963814095","https://openalex.org/W2964157492","https://openalex.org/W2964309882","https://openalex.org/W2967622921","https://openalex.org/W2967767914","https://openalex.org/W2986050084","https://openalex.org/W2989035356","https://openalex.org/W2990138404","https://openalex.org/W2990205821","https://openalex.org/W3023341321","https://openalex.org/W3035502324","https://openalex.org/W3092718160","https://openalex.org/W3093466063","https://openalex.org/W3094664776","https://openalex.org/W3102457447","https://openalex.org/W3106297436","https://openalex.org/W3106988096","https://openalex.org/W3160550216","https://openalex.org/W3169367294","https://openalex.org/W3170630188","https://openalex.org/W3183673520","https://openalex.org/W3192871594","https://openalex.org/W3200949949","https://openalex.org/W3202961428","https://openalex.org/W3206855633","https://openalex.org/W4240153047","https://openalex.org/W6604233986"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W4205463238","https://openalex.org/W259157601","https://openalex.org/W2761785940","https://openalex.org/W2110523656","https://openalex.org/W1482209366","https://openalex.org/W2965594636","https://openalex.org/W2912550626"],"abstract_inverted_index":{"In":[0],"the":[1,12,62,76,82,119,132,148,165,181,196,206],"booming":[2],"video":[3,5,16],"era,":[4],"segmentation":[6,18],"attracts":[7],"increasing":[8],"research":[9],"attention":[10],"in":[11,24,176],"multimedia":[13],"community.":[14],"Semi-supervised":[15],"object":[17,33,89,106,110,120,126,166,182],"(VOS)":[19],"aims":[20],"at":[21,137],"segmenting":[22],"objects":[23],"all":[25],"target":[26,52],"frames":[27],"of":[28,35,86],"a":[29,98,177],"video,":[30],"given":[31],"annotated":[32],"masks":[34,111,143],"reference":[36],"frames.":[37],"Most":[38],"existing":[39,210],"methods":[40],"build":[41],"pixel-wise":[42,48],"reference-target":[43,149],"correlations":[44,150],"and":[45,67,84,93,108,184,201,213],"then":[46],"perform":[47],"tracking":[49,63],"to":[50,55,65,80,90,112,187],"obtain":[51],"masks.":[53],"Due":[54],"neglecting":[56],"object-level":[57],"cues,":[58],"pixel-level":[59],"approaches":[60],"make":[61],"vulnerable":[64],"perturbations,":[66],"even":[68],"indiscriminate":[69],"among":[70,209],"similar":[71],"objects.":[72],"Towards":[73],"robust":[74],"VOS,":[75],"key":[77],"insight":[78],"is":[79],"calibrate":[81,109],"representation":[83],"mask":[85,173],"each":[87],"specific":[88],"be":[91,188],"expressive":[92],"discriminative.":[94],"Accordingly,":[95],"we":[96,117],"propose":[97],"new":[99],"deep":[100],"network,":[101],"which":[102],"can":[103],"adaptively":[104],"construct":[105,118],"representations":[107,121,183],"achieve":[113],"stronger":[114],"robustness.":[115],"First,":[116],"by":[122],"applying":[123],"an":[124],"adaptive":[125],"proxy":[127,167],"(AOP)":[128],"aggregation":[129],"method,":[130],"where":[131,180],"proxies":[133],"represent":[134],"arbitrary-shaped":[135],"segments":[136],"multi-levels":[138],"for":[139],"reference.":[140],"Then,":[141],"prototype":[142],"are":[144,157,193],"initially":[145],"generated":[146],"from":[147],"based":[151],"on":[152,164,195],"AOP.":[153],"Afterwards,":[154],"such":[155],"proto-masks":[156,185],"further":[158],"calibrated":[159],"through":[160],"network":[161],"modulation,":[162],"conditioning":[163],"representations.":[168],"We":[169],"consolidate":[170],"this":[171],"conditional":[172],"calibration":[174],"process":[175],"progressive":[178],"manner,":[179],"evolve":[186],"discriminative":[189],"iteratively.":[190],"Extensive":[191],"experiments":[192],"conducted":[194],"standard":[197],"VOS":[198],"benchmarks,":[199],"YouTube-VOS-18/19":[200],"DAVIS-17.":[202],"Our":[203],"model":[204],"achieves":[205],"state-of-the-art":[207],"performance":[208],"published":[211],"works,":[212],"also":[214],"exhibits":[215],"superior":[216],"robustness":[217],"against":[218],"perturbations.":[219]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
