{"id":"https://openalex.org/W4401990305","doi":"https://doi.org/10.1109/tcsvt.2024.3451981","title":"MoBox: Enhancing Video Object Segmentation With Motion-Augmented Box Supervision","display_name":"MoBox: Enhancing Video Object Segmentation With Motion-Augmented Box Supervision","publication_year":2024,"publication_date":"2024-08-29","ids":{"openalex":"https://openalex.org/W4401990305","doi":"https://doi.org/10.1109/tcsvt.2024.3451981"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3451981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3451981","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101635377","display_name":"Xiaomin Li","orcid":"https://orcid.org/0000-0001-7202-6865"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaomin Li","raw_affiliation_strings":["School of Artificial Intelligence, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020704860","display_name":"Qinghe Wang","orcid":"https://orcid.org/0000-0001-6908-5485"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinghe Wang","raw_affiliation_strings":["School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079789888","display_name":"Dezhuang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhuang Li","raw_affiliation_strings":["School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101903528","display_name":"Mengmeng Ge","orcid":"https://orcid.org/0009-0003-1301-1323"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengmeng Ge","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067168484","display_name":"Xu Jia","orcid":"https://orcid.org/0000-0003-3168-3505"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Jia","raw_affiliation_strings":["School of Artificial Intelligence, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101509529","display_name":"You He","orcid":"https://orcid.org/0000-0002-6111-340X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"You He","raw_affiliation_strings":["Department of Electronic Engineering, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006986293","display_name":"Huchuan Lu","orcid":"https://orcid.org/0000-0002-6668-9758"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huchuan Lu","raw_affiliation_strings":["School of Information and Communication Engineering, Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101635377"],"corresponding_institution_ids":["https://openalex.org/I27357992"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13175791,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":"1","first_page":"405","last_page":"417"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.7535736560821533},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7180359363555908},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6736826300621033},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6589406132698059},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5926882028579712},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5451341271400452},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4504566490650177},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4121731221675873}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7535736560821533},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7180359363555908},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6736826300621033},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6589406132698059},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5926882028579712},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5451341271400452},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4504566490650177},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4121731221675873}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3451981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3451981","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":81,"referenced_works":["https://openalex.org/W764651262","https://openalex.org/W1495267108","https://openalex.org/W1513100184","https://openalex.org/W2124351162","https://openalex.org/W2168804568","https://openalex.org/W2221898772","https://openalex.org/W2259424905","https://openalex.org/W2560474170","https://openalex.org/W2798441772","https://openalex.org/W2799157347","https://openalex.org/W2901412525","https://openalex.org/W2956648669","https://openalex.org/W2962766617","https://openalex.org/W2963253279","https://openalex.org/W2963426332","https://openalex.org/W2963732700","https://openalex.org/W2963782415","https://openalex.org/W2963983744","https://openalex.org/W2964218467","https://openalex.org/W2964700958","https://openalex.org/W2990205821","https://openalex.org/W3005203926","https://openalex.org/W3034798428","https://openalex.org/W3034915791","https://openalex.org/W3045477169","https://openalex.org/W3082755608","https://openalex.org/W3094664776","https://openalex.org/W3106546328","https://openalex.org/W3106773277","https://openalex.org/W3107695429","https://openalex.org/W3108819577","https://openalex.org/W3109908659","https://openalex.org/W3110030584","https://openalex.org/W3110389231","https://openalex.org/W3129684859","https://openalex.org/W3132520841","https://openalex.org/W3164543136","https://openalex.org/W3170630188","https://openalex.org/W3175132347","https://openalex.org/W3176692018","https://openalex.org/W3180169285","https://openalex.org/W3182200686","https://openalex.org/W3183673520","https://openalex.org/W3188582951","https://openalex.org/W3192692200","https://openalex.org/W3197828681","https://openalex.org/W3203318343","https://openalex.org/W3203943263","https://openalex.org/W3212940248","https://openalex.org/W3215023725","https://openalex.org/W4205474609","https://openalex.org/W4283796148","https://openalex.org/W4285165925","https://openalex.org/W4293464712","https://openalex.org/W4294311190","https://openalex.org/W4312396403","https://openalex.org/W4312578467","https://openalex.org/W4386076033","https://openalex.org/W4387448157","https://openalex.org/W4389816315","https://openalex.org/W4390873252","https://openalex.org/W4390873799","https://openalex.org/W4390874575","https://openalex.org/W4395097100","https://openalex.org/W4396903426","https://openalex.org/W6733814495","https://openalex.org/W6754033419","https://openalex.org/W6756984415","https://openalex.org/W6757817989","https://openalex.org/W6759534164","https://openalex.org/W6762196805","https://openalex.org/W6767026726","https://openalex.org/W6767465851","https://openalex.org/W6784713722","https://openalex.org/W6796505553","https://openalex.org/W6796524941","https://openalex.org/W6798046461","https://openalex.org/W6846581650","https://openalex.org/W6851543690","https://openalex.org/W6852040150","https://openalex.org/W6852404605"],"related_works":["https://openalex.org/W4379231730","https://openalex.org/W4389858081","https://openalex.org/W2737719445","https://openalex.org/W4239098401","https://openalex.org/W2501551404","https://openalex.org/W4385583601","https://openalex.org/W4298131179","https://openalex.org/W2113201962","https://openalex.org/W4205448459","https://openalex.org/W1522196789"],"abstract_inverted_index":{"We":[0],"propose":[1,116],"MoBox,":[2,193],"a":[3,23,37,45,117,156],"low-cost":[4],"solution":[5],"for":[6,19],"semi-supervised":[7,25,197],"video":[8,26,198],"object":[9,27,199],"segmentation":[10,28,200],"that":[11,49,195],"requires":[12],"only":[13,207],"bounding":[14],"boxes":[15],"as":[16,112],"manual":[17],"annotations":[18],"training.":[20],"Built":[21],"upon":[22],"mature":[24],"network,":[29],"we":[30,43,102,133],"redesign":[31],"the":[32,57,61,65,76,83,89,99,108,128,136,170,182,190,196],"training":[33,40],"losses":[34],"and":[35,64,78,98,115,175,211],"employ":[36],"more":[38],"stringent":[39],"strategy.":[41],"Specifically,":[42],"introduce":[44,135],"well-designed":[46],"constraint":[47,139],"term":[48],"enhances":[50],"traditional":[51],"spatial":[52],"projection":[53],"by":[54,95],"simultaneously":[55],"leveraging":[56],"projections":[58],"of":[59,86,130,167,179,192,214],"both":[60],"ground-truth":[62],"box":[63,209],"predicted":[66],"mask":[67],"across":[68],"two":[69],"axes,":[70],"rather":[71],"than":[72],"evaluating":[73],"discrepancies":[74],"along":[75],"x-axis":[77],"y-axis":[79],"independently.":[80],"To":[81],"harness":[82],"intrinsic":[84,212],"properties":[85],"videos,":[87],"considering":[88],"underlying":[90],"correspondence":[91],"between":[92],"motion":[93,104,118],"represented":[94],"optical":[96],"flow":[97],"original":[100],"image,":[101],"incorporate":[103],"coherence":[105],"information":[106,114,213],"into":[107],"color":[109],"consistency":[110],"loss":[111,120],"supplementary":[113],"discrepancy":[119],"to":[121,126],"obtain":[122],"accurate":[123],"boundaries.":[124],"Additionally,":[125],"mitigate":[127],"ambiguity":[129],"weak":[131],"supervision,":[132],"further":[134],"pseudo":[137],"strict":[138],"during":[140],"training,":[141],"which":[142],"significantly":[143],"improves":[144],"model":[145,201],"performance.":[146],"Our":[147],"approach":[148],"yields":[149],"competitive":[150],"scores":[151],"on":[152,169,181],"popular":[153],"benchmarks,":[154],"achieving":[155],"<inline-formula":[157],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[158],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[159],"<tex-math":[160],"notation=\"LaTeX\">$\\mathcal":[161],"{J}\\&amp;":[162],"\\mathcal":[163],"{F}$":[164],"</tex-math></inline-formula>":[165],"score":[166,178],"78.6":[168],"DAVIS":[171],"2017":[172],"validation":[173,185],"set":[174],"an":[176],"Overall":[177],"78.0":[180],"YouTube-VOS":[183],"2018":[184],"set.":[186],"These":[187],"results":[188],"highlight":[189],"efficacy":[191],"demonstrating":[194],"can":[202],"be":[203],"effectively":[204],"trained":[205],"using":[206],"motion-augmented":[208],"supervision":[210],"videos.":[215]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
