{"id":"https://openalex.org/W4225575592","doi":"https://doi.org/10.1109/tmm.2022.3159403","title":"Starting Point Selection and Multiple-Standard Matching for Video Object Segmentation With Language Annotation","display_name":"Starting Point Selection and Multiple-Standard Matching for Video Object Segmentation With Language Annotation","publication_year":2022,"publication_date":"2022-03-15","ids":{"openalex":"https://openalex.org/W4225575592","doi":"https://doi.org/10.1109/tmm.2022.3159403"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3159403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3159403","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017171970","display_name":"Mingjie Sun","orcid":"https://orcid.org/0000-0002-3697-7927"},"institutions":[{"id":"https://openalex.org/I146655781","display_name":"University of Liverpool","ror":"https://ror.org/04xs57h96","country_code":"GB","type":"education","lineage":["https://openalex.org/I146655781"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Mingjie Sun","raw_affiliation_strings":["University of Liverpool, Liverpool, U.K"],"affiliations":[{"raw_affiliation_string":"University of Liverpool, Liverpool, U.K","institution_ids":["https://openalex.org/I146655781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011918180","display_name":"Jimin Xiao","orcid":"https://orcid.org/0000-0002-9416-2486"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jimin Xiao","raw_affiliation_strings":["School of Advanced Technology, Xi&#x00E1;n Jiaotong-Liverpool University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi&#x00E1;n Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033755146","display_name":"Eng Gee Lim","orcid":"https://orcid.org/0000-0003-0199-7386"},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Eng Gee Lim","raw_affiliation_strings":["School of Advanced Technology, Xi&#x00E1;n Jiaotong-Liverpool University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi&#x00E1;n Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100362745","display_name":"Yao Zhao","orcid":"https://orcid.org/0000-0002-8581-9554"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Zhao","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5017171970"],"corresponding_institution_ids":["https://openalex.org/I146655781"],"apc_list":null,"apc_paid":null,"fwci":0.6037,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.65933656,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"25","issue":null,"first_page":"3354","last_page":"3363"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8871341347694397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7554347515106201},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.6553952693939209},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6551895141601562},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6167462468147278},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5496835708618164},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5345363616943359},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5284053087234497},{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.5183249711990356},{"id":"https://openalex.org/keywords/video-tracking","display_name":"Video tracking","score":0.5123780965805054},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5093086957931519},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.47136297821998596},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4530295431613922},{"id":"https://openalex.org/keywords/block-matching-algorithm","display_name":"Block-matching algorithm","score":0.42829856276512146},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.42533737421035767},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.41922253370285034},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.41796430945396423},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.41602978110313416},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1374218761920929}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8871341347694397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7554347515106201},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.6553952693939209},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6551895141601562},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6167462468147278},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5496835708618164},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5345363616943359},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5284053087234497},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.5183249711990356},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.5123780965805054},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5093086957931519},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.47136297821998596},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4530295431613922},{"id":"https://openalex.org/C167510206","wikidata":"https://www.wikidata.org/wiki/Q2835824","display_name":"Block-matching algorithm","level":4,"score":0.42829856276512146},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.42533737421035767},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.41922253370285034},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.41796430945396423},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.41602978110313416},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1374218761920929},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2022.3159403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3159403","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1794522962","display_name":null,"funder_award_id":"61972323","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3910816902","display_name":null,"funder_award_id":"2018AAA0102100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5655896502","display_name":null,"funder_award_id":"62120106009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7536822468","display_name":null,"funder_award_id":"U1936212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W7746136","https://openalex.org/W1536680647","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1973054923","https://openalex.org/W2037227137","https://openalex.org/W2108598243","https://openalex.org/W2117539524","https://openalex.org/W2138621090","https://openalex.org/W2138682569","https://openalex.org/W2194775991","https://openalex.org/W2247513039","https://openalex.org/W2470139095","https://openalex.org/W2489434015","https://openalex.org/W2610147486","https://openalex.org/W2630837129","https://openalex.org/W2747053578","https://openalex.org/W2792215676","https://openalex.org/W2916743882","https://openalex.org/W2916797271","https://openalex.org/W2921536280","https://openalex.org/W2962825871","https://openalex.org/W2962942822","https://openalex.org/W2963037989","https://openalex.org/W2963109634","https://openalex.org/W2963150697","https://openalex.org/W2963253279","https://openalex.org/W2963445828","https://openalex.org/W2963782415","https://openalex.org/W2964157492","https://openalex.org/W2964218467","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2987391422","https://openalex.org/W2987401211","https://openalex.org/W2987734933","https://openalex.org/W2990205821","https://openalex.org/W2993182889","https://openalex.org/W2995633856","https://openalex.org/W2997463005","https://openalex.org/W3003423830","https://openalex.org/W3010775719","https://openalex.org/W3023463084","https://openalex.org/W3027731085","https://openalex.org/W3035080658","https://openalex.org/W3098232790","https://openalex.org/W3099166112","https://openalex.org/W3109120382","https://openalex.org/W3110030584","https://openalex.org/W3117097536","https://openalex.org/W3126391825","https://openalex.org/W3166738350","https://openalex.org/W3177087374","https://openalex.org/W3182736162","https://openalex.org/W6739696289","https://openalex.org/W6759534164","https://openalex.org/W6784639621","https://openalex.org/W6784713722"],"related_works":["https://openalex.org/W2385949326","https://openalex.org/W2789220062","https://openalex.org/W1967061043","https://openalex.org/W2019566805","https://openalex.org/W1487175407","https://openalex.org/W2068395580","https://openalex.org/W2534746541","https://openalex.org/W2517104666","https://openalex.org/W3177406559","https://openalex.org/W2786306966"],"abstract_inverted_index":{"In":[0],"this":[1],"study,":[2],"we":[3],"investigate":[4],"language-level":[5],"video":[6,56,98],"object":[7,99],"segmentation,":[8],"where":[9],"first-frame":[10],"language":[11,22],"annotation":[12],"is":[13,24,65,93,122],"used":[14],"to":[15,43,68,85],"describe":[16,69],"the":[17,33,38,50,87,103,126],"target":[18],"object.":[19],"Because":[20],"a":[21,31,54,58,114],"label":[23],"typically":[25],"compatible":[26],"with":[27],"all":[28],"frames":[29],"in":[30],"video,":[32],"proposed":[34,67,91],"method":[35,92],"can":[36],"choose":[37],"most":[39],"suitable":[40],"starting":[41],"frame":[42],"mitigate":[44],"initialization":[45],"failure.":[46],"Apart":[47],"from":[48,53],"extracting":[49],"visual":[51],"feature":[52],"static":[55],"frame,":[57],"motion-language":[59],"score":[60],"based":[61],"on":[62,95,124],"optical":[63],"flow":[64],"also":[66],"moving":[70],"objects":[71],"more":[72],"accurately.":[73],"Scores":[74],"of":[75],"multiple":[76],"standards":[77],"are":[78],"then":[79],"aggregated":[80],"using":[81],"an":[82],"attention-based":[83],"mechanism":[84],"predict":[86],"final":[88],"result.":[89],"The":[90,135],"evaluated":[94],"four":[96],"widely-used":[97],"segmentation":[100],"datasets,":[101,112],"including":[102],"DAVIS":[104,106,127,131],"2017,":[105],"2016,":[107],"SegTrack":[108],"V2":[109],"and":[110,113,130],"YouTubeObject":[111],"novel":[115],"accuracy":[116],"measured":[117],"as":[118],"mean":[119],"region":[120],"similarity":[121],"obtained":[123],"both":[125],"2017":[128],"(67.2%)":[129],"2016":[132],"(83.5%)":[133],"datasets.":[134],"code":[136],"will":[137],"be":[138],"published.":[139]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
