{"id":"https://openalex.org/W4402773761","doi":"https://doi.org/10.1109/cvpr52733.2024.00335","title":"Open-Vocabulary Segmentation with Semantic-Assisted Calibration","display_name":"Open-Vocabulary Segmentation with Semantic-Assisted Calibration","publication_year":2024,"publication_date":"2024-06-16","ids":{"openalex":"https://openalex.org/W4402773761","doi":"https://doi.org/10.1109/cvpr52733.2024.00335"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52733.2024.00335","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.00335","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101685090","display_name":"Yong Liu","orcid":"https://orcid.org/0009-0000-3078-1598"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]},{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yong Liu","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University,Shenzhen Key Laboratory of Ubiquitous Data Enabling,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University,Shenzhen Key Laboratory of Ubiquitous Data Enabling,China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113188053","display_name":"Sule Bai","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sule Bai","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University,Shenzhen Key Laboratory of Ubiquitous Data Enabling,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University,Shenzhen Key Laboratory of Ubiquitous Data Enabling,China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101691639","display_name":"Guanbin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanbin Li","raw_affiliation_strings":["Sun Yat-sen University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100650009","display_name":"Yitong Wang","orcid":"https://orcid.org/0000-0002-7559-4152"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yitong Wang","raw_affiliation_strings":["ByteDance Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ByteDance Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":null,"display_name":"Yansong Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I3131625388","display_name":"University Town of Shenzhen","ror":"https://ror.org/05f5j6225","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131625388"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yansong Tang","raw_affiliation_strings":["Shenzhen International Graduate School, Tsinghua University,Shenzhen Key Laboratory of Ubiquitous Data Enabling,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shenzhen International Graduate School, Tsinghua University,Shenzhen Key Laboratory of Ubiquitous Data Enabling,China","institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101685090"],"corresponding_institution_ids":["https://openalex.org/I3131625388","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":9.6575,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.98417828,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3491","last_page":"3500"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.972100019454956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.972100019454956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7728954553604126},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6490949988365173},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.599287748336792},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5903202891349792},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5460484027862549},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.4972555935382843},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.41523459553718567},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.37207284569740295},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.13210979104042053},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07423904538154602},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07360085844993591}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7728954553604126},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6490949988365173},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.599287748336792},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5903202891349792},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5460484027862549},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.4972555935382843},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.41523459553718567},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.37207284569740295},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13210979104042053},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07423904538154602},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07360085844993591},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52733.2024.00335","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.00335","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6299999952316284,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G800545075","display_name":null,"funder_award_id":"62206153,62322608","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W4307058054","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2385319785","https://openalex.org/W2900827440","https://openalex.org/W3167549738","https://openalex.org/W1522196789"],"abstract_inverted_index":{"This":[0],"paper":[1],"studies":[2],"open-vocabulary":[3,23,149],"segmentation":[4,150],"(OVS)":[5],"through":[6],"calibrating":[7],"in-vocabulary":[8,87],"and":[9,56,89,135,169],"domain-biased":[10,90],"embedding":[11,88,114],"space":[12],"with":[13,29,60,79],"generalized":[14,107],"contextual":[15,123],"prior":[16,109],"of":[17,22,26,32,39,69,110,132,159],"CLIP.":[18],"As":[19],"the":[20,30,37,86,130,157],"core":[21],"understanding,":[24],"alignment":[25],"visual":[27],"content":[28],"semantics":[31],"unbounded":[33],"text":[34],"has":[35],"become":[36],"bottleneck":[38],"this":[40,44,84,94],"field.":[41],"To":[42,93],"address":[43],"challenge,":[45],"recent":[46],"works":[47],"propose":[48,170],"to":[49,85,115,128],"utilize":[50],"CLIP":[51,61,91,111],"as":[52],"an":[53],"additional":[54],"classifier":[55],"aggregate":[57],"model":[58],"predictions":[59],"classification":[62],"results.":[63],"Despite":[64],"their":[65],"remarkable":[66],"progress,":[67],"performance":[68,145],"OVS":[70],"methods":[71],"in":[72],"relevant":[73],"scenarios":[74],"is":[75,126,179],"still":[76],"unsatisfactory":[77],"compared":[78],"supervised":[80],"counterparts.":[81],"We":[82],"attribute":[83],"prediction.":[92],"end,":[95],"we":[96,105,153],"present":[97],"a":[98,122,171],"Semantic-assisted":[99],"CAlibration":[100],"Network":[101],"(SCAN).":[102],"In":[103],"SCAN,":[104],"incor-porate":[106],"semantic":[108,165],"into":[112],"proposal":[113],"avoid":[116],"collapsing":[117],"on":[118,146,156],"known":[119],"categories.":[120],"Besides,":[121],"shift":[124],"strategy":[125],"applied":[127],"mitigate":[129],"lack":[131],"global":[133],"context":[134],"unnatural":[136],"background":[137],"noise.":[138],"With":[139],"above":[140],"designs,":[141],"SCAN":[142],"achieves":[143],"state-of-the-art":[144],"all":[147],"popular":[148],"benchmarks.":[151],"Furthermore,":[152],"also":[154],"focus":[155],"problem":[158],"existing":[160],"evaluation":[161],"system":[162],"that":[163],"ignores":[164],"duplication":[166],"across":[167],"categories,":[168],"new":[172],"metric":[173],"called":[174],"Semantic-Guided":[175],"IoU":[176],"(SG-IoU).":[177],"Code":[178],"available":[180],"here.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":4}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-10-10T00:00:00"}
