{"id":"https://openalex.org/W4415707710","doi":"https://doi.org/10.1109/icme59968.2025.11209220","title":"UniBind: Leveraging LLM-Augmented Knowledge Base for Scene Integration","display_name":"UniBind: Leveraging LLM-Augmented Knowledge Base for Scene Integration","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415707710","doi":"https://doi.org/10.1109/icme59968.2025.11209220"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100782372","display_name":"Zhonghao Zhang","orcid":"https://orcid.org/0000-0002-4354-4636"},"institutions":[{"id":"https://openalex.org/I21642278","display_name":"Ningxia University","ror":"https://ror.org/04j7b2v61","country_code":"CN","type":"education","lineage":["https://openalex.org/I21642278"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhonghao Zhang","raw_affiliation_strings":["School of Information Engineering, Ningxia University,Yinchuan,China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Ningxia University,Yinchuan,China","institution_ids":["https://openalex.org/I21642278"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100666490","display_name":"Ruonan Zhang","orcid":"https://orcid.org/0000-0003-0030-6758"},"institutions":[{"id":"https://openalex.org/I21642278","display_name":"Ningxia University","ror":"https://ror.org/04j7b2v61","country_code":"CN","type":"education","lineage":["https://openalex.org/I21642278"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruonan Zhang","raw_affiliation_strings":["School of Advanced Interdisciplinary Studies, Ningxia University,Zhongwei,China"],"affiliations":[{"raw_affiliation_string":"School of Advanced Interdisciplinary Studies, Ningxia University,Zhongwei,China","institution_ids":["https://openalex.org/I21642278"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032269255","display_name":"Libo Liu","orcid":"https://orcid.org/0000-0001-5773-0184"},"institutions":[{"id":"https://openalex.org/I21642278","display_name":"Ningxia University","ror":"https://ror.org/04j7b2v61","country_code":"CN","type":"education","lineage":["https://openalex.org/I21642278"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Libo Liu","raw_affiliation_strings":["School of Information Engineering, Ningxia University,Yinchuan,China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Ningxia University,Yinchuan,China","institution_ids":["https://openalex.org/I21642278"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100782372"],"corresponding_institution_ids":["https://openalex.org/I21642278"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30871106,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.00430000014603138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.7239000201225281},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5576000213623047},{"id":"https://openalex.org/keywords/knowledge-representation-and-reasoning","display_name":"Knowledge representation and reasoning","score":0.5009999871253967},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.48249998688697815},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4595000147819519},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.40799999237060547},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.39419999718666077},{"id":"https://openalex.org/keywords/knowledge-based-systems","display_name":"Knowledge-based systems","score":0.387800008058548},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3799999952316284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8309999704360962},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.7239000201225281},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5932000279426575},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5598000288009644},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5576000213623047},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.5009999871253967},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.48249998688697815},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4595000147819519},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.40799999237060547},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.387800008058548},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36309999227523804},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.36059999465942383},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.34119999408721924},{"id":"https://openalex.org/C56289545","wikidata":"https://www.wikidata.org/wiki/Q6423376","display_name":"Knowledge integration","level":3,"score":0.3287999927997589},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C85407183","wikidata":"https://www.wikidata.org/wiki/Q1045785","display_name":"Semantic network","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.2809000015258789},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209220","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209220","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322877","display_name":"Natural Science Foundation of Ningxia Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W2077069816","https://openalex.org/W2964187781","https://openalex.org/W4393154896"],"related_works":[],"abstract_inverted_index":{"We":[0,164],"present":[1],"UniBind,":[2],"a":[3,78,97],"flexible":[4],"and":[5,14,53,87,94,122,139,149,151,160],"efficient":[6],"approach":[7],"that":[8,103],"integrates":[9,104],"extended":[10],"scene":[11,20,32,62,79,106],"graph":[12],"descriptions":[13],"image":[15,124],"captions":[16],"to":[17,40,114],"learn":[18],"comprehensive":[19],"knowledge.":[21],"Existing":[22],"works,":[23],"such":[24],"as":[25],"Structure-CLIP":[26],"[1],":[27],"enhance":[28,115],"structured":[29,120,155],"representations":[30,121],"with":[31,50,135],"graphs":[33],"but":[34],"often":[35],"remain":[36],"semantically":[37],"monolithic,":[38],"failing":[39],"incorporate":[41],"diverse":[42],"semantic":[43,117,125],"information":[44],"from":[45],"images":[46],"containing":[47],"numerous":[48],"objects":[49],"varying":[51],"backgrounds":[52],"conditions.":[54],"UniBind":[55],"addresses":[56],"this":[57],"limitation":[58],"by":[59,66],"learning":[60],"multiple":[61],"knowledge":[63,80,107],"and,":[64],"supported":[65],"large":[67,83,89],"language":[68,84,90],"models":[69,85,91,113],"(LLMs),":[70],"further":[71],"enriching":[72],"semantics.":[73],"Specifically,":[74],"we":[75],"1)":[76],"construct":[77],"base":[81,108],"using":[82],"(LLMs)":[86],"multi-modal":[88],"(Multi-modal":[92],"LLMs),":[93],"2)":[95],"propose":[96],"Multi-modal":[98],"Knowledge":[99],"Integration":[100],"Encoder":[101],"(MKIE)":[102],"the":[105,146,158],"into":[109],"contrastive":[110],"language-image":[111],"pretraining":[112],"deep":[116],"relationships":[118],"in":[119,141,154],"enrich":[123],"information.":[126],"Our":[127],"method":[128],"achieves":[129],"state-of-the-art":[130],"performance":[131],"(see":[132],"Fig.":[133],"1),":[134],"gains":[136],"of":[137],"2.1%":[138],"2.3%":[140],"cross-modal":[142],"image-text":[143],"retrieval":[144],"on":[145,157],"MSCOCO":[147],"dataset":[148],"1.5%":[150],"1.2%":[152],"improvements":[153],"representation":[156],"VG-Attribution":[159],"VG-Relation":[161],"datasets,":[162],"respectively.":[163],"release":[165],"our":[166],"code":[167],"at":[168],"https://github.com/HulkZh/UniBind.":[169]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-30T00:00:00"}
