{"id":"https://openalex.org/W7138402042","doi":"https://doi.org/10.1609/aaai.v40i14.38215","title":"InfoCLIP: Bridging Vision-Language Pretraining and Open-Vocabulary Semantic Segmentation via Information-Theoretic Alignment Transfer","display_name":"InfoCLIP: Bridging Vision-Language Pretraining and Open-Vocabulary Semantic Segmentation via Information-Theoretic Alignment Transfer","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138402042","doi":"https://doi.org/10.1609/aaai.v40i14.38215"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i14.38215","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38215","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38215/42177","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38215/42177","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125715122","display_name":"Muyao Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Muyao Yuan","raw_affiliation_strings":["School of Computer Science and Technology, Xi\u2019an Jiaotong University\nMinistry of Education Key Laboratory of Intelligent Networks and Network Security, Xi\u2019an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi\u2019an Jiaotong University\nMinistry of Education Key Laboratory of Intelligent Networks and Network Security, Xi\u2019an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030327487","display_name":"Yuanhong Zhang","orcid":"https://orcid.org/0009-0006-0281-2987"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanhong Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Xi\u2019an Jiaotong University\nShaanxi Province Key Laboratory of Big Data Knowledge Engineering, Xi\u2019an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi\u2019an Jiaotong University\nShaanxi Province Key Laboratory of Big Data Knowledge Engineering, Xi\u2019an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129740391","display_name":"Weizhan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weizhan Zhang","raw_affiliation_strings":["School of Computer Science and Technology, Xi\u2019an Jiaotong University\nMinistry of Education Key Laboratory of Intelligent Networks and Network Security, Xi\u2019an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi\u2019an Jiaotong University\nMinistry of Education Key Laboratory of Intelligent Networks and Network Security, Xi\u2019an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129718256","display_name":"Lan Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lan Ma","raw_affiliation_strings":["China Telecom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Telecom","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129649805","display_name":"Yuan Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136246","display_name":"China Telecom (China)","ror":"https://ror.org/03jgnzt20","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210136246"]},{"id":"https://openalex.org/I4387153335","display_name":"China Telecom","ror":"https://ror.org/05p67dv18","country_code":null,"type":"company","lineage":["https://openalex.org/I4387153335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Gao","raw_affiliation_strings":["China Telecom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Telecom","institution_ids":["https://openalex.org/I4210136246","https://openalex.org/I4387153335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040143380","display_name":"Jiangyong Ying","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127487","display_name":"Vision Technology (United States)","ror":"https://ror.org/03gmxkp43","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127487"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiangyong Ying","raw_affiliation_strings":["China Telecom E-surfing Vision Technology Co., Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Telecom E-surfing Vision Technology Co., Ltd","institution_ids":["https://openalex.org/I4210127487"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yudeng Xin","orcid":null},"institutions":[{"id":"https://openalex.org/I1323252656","display_name":"Information Technology University","ror":"https://ror.org/00ngv8j44","country_code":"PK","type":"education","lineage":["https://openalex.org/I1323252656"]},{"id":"https://openalex.org/I165779595","display_name":"The University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU","PK"],"is_corresponding":false,"raw_author_name":"Yudeng Xin","raw_affiliation_strings":["Faculty of Engineering and Information Technology, University of Melbourne"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Engineering and Information Technology, University of Melbourne","institution_ids":["https://openalex.org/I1323252656","https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5125715122"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.70457797,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"14","first_page":"12240","last_page":"12248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7494999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7494999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05469999834895134,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.04809999838471413,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7010999917984009},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.6665999889373779},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5637999773025513},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.48190000653266907},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4788999855518341},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.46549999713897705},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4526999890804291},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.42320001125335693}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7732999920845032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7037000060081482},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7010999917984009},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.6665999889373779},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5637999773025513},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.48190000653266907},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4788999855518341},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.46549999713897705},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4526999890804291},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4293999969959259},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.42320001125335693},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.38940000534057617},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37400001287460327},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3617999851703644},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.35740000009536743},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.30169999599456787},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.29820001125335693},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.29660001397132874},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2915000021457672},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.26660001277923584},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i14.38215","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38215","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38215/42177","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i14.38215","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38215","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38215/42177","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4551721513271332,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138402042.pdf","grobid_xml":"https://content.openalex.org/works/W7138402042.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"the":[1,36,62,81,105,109,116,127,136],"strong":[2],"generalization":[3],"ability":[4],"of":[5,112,138],"CLIP":[6,23,60,87,114,142],"has":[7],"facilitated":[8],"open-vocabulary":[9,145],"semantic":[10,96,123,146],"segmentation,":[11,147],"which":[12,49],"labels":[13],"pixels":[14],"using":[15],"arbitrary":[16],"text.":[17],"However,":[18],"existing":[19],"methods":[20],"that":[21],"fine-tune":[22],"for":[24,126,144],"segmentation":[25,63,128],"on":[26],"limited":[27],"seen":[28],"categories":[29],"often":[30],"lead":[31],"to":[32,54,61,88,119],"overfitting":[33],"and":[34,115,151],"degrade":[35],"pretrained":[37,59,86,113],"vision-language":[38],"alignment.":[39],"To":[40],"stabilize":[41],"modality":[42,83],"alignment":[43,56,84,110],"during":[44],"fine-tuning,":[45],"we":[46,79,103],"propose":[47],"InfoCLIP,":[48],"leverages":[50],"an":[51],"information-theoretic":[52],"perspective":[53],"transfer":[55,67,120],"knowledge":[57,111],"from":[58,85,92],"task.":[64,129],"Specifically,":[65],"this":[66],"is":[68],"guided":[69],"by":[70],"two":[71],"novel":[72],"objectives":[73],"grounded":[74],"in":[75,140,153],"mutual":[76,106],"information.":[77],"First,":[78],"compress":[80],"pixel-text":[82],"reduce":[89],"noise":[90],"arising":[91],"its":[93,149],"coarse-grained":[94],"local":[95,122],"representations":[97],"learned":[98],"under":[99],"image-text":[100],"supervision.":[101],"Second,":[102],"maximize":[104],"information":[107],"between":[108],"fine-tuned":[117],"model":[118],"compact":[121],"relations":[124],"suited":[125],"Extensive":[130],"evaluations":[131],"across":[132],"various":[133],"benchmarks":[134],"validate":[135],"effectiveness":[137],"InfoCLIP":[139],"enhancing":[141],"fine-tuning":[143],"demonstrating":[148],"adaptability":[150],"superiority":[152],"asymmetric":[154],"transfer.":[155]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2026-03-18T00:00:00"}
