{"id":"https://openalex.org/W4417528188","doi":"https://doi.org/10.1109/iccv51701.2025.00450","title":"Taming the Untamed: Graph-Based Knowledge Retrieval and Reasoning for MLLMs to Conquer the Unknown","display_name":"Taming the Untamed: Graph-Based Knowledge Retrieval and Reasoning for MLLMs to Conquer the Unknown","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4417528188","doi":"https://doi.org/10.1109/iccv51701.2025.00450"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.00450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2506.17589","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100412555","display_name":"Bowen Wang","orcid":"https://orcid.org/0000-0003-0297-9008"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Bowen Wang","raw_affiliation_strings":["Osaka University,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Osaka University,Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101618975","display_name":"Zhouqiang Jiang","orcid":"https://orcid.org/0009-0002-1304-4179"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Zhouqiang Jiang","raw_affiliation_strings":["Osaka University,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Osaka University,Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092151395","display_name":"Yasuaki Susumu","orcid":null},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yasuaki Susumu","raw_affiliation_strings":["Osaka University,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Osaka University,Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038384966","display_name":"Shotaro Miwa","orcid":null},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shotaro Miwa","raw_affiliation_strings":["Osaka University,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Osaka University,Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101534861","display_name":"Tianwei Chen","orcid":"https://orcid.org/0000-0002-2544-7744"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tianwei Chen","raw_affiliation_strings":["Osaka University,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Osaka University,Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107672040","display_name":"Yuta Nakashima","orcid":null},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"The University of Osaka","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuta Nakashima","raw_affiliation_strings":["Osaka University,Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Osaka University,Japan","institution_ids":["https://openalex.org/I98285908"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37335742,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4732","last_page":"4742"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8180999755859375,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8180999755859375,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.08560000360012054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.03759999945759773,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.589900016784668},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.585099995136261},{"id":"https://openalex.org/keywords/divide-and-conquer-algorithms","display_name":"Divide and conquer algorithms","score":0.5479000210762024},{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.4309999942779541},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.4090000092983246},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4059999883174896},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.3952000141143799}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7847999930381775},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.589900016784668},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.585099995136261},{"id":"https://openalex.org/C71559656","wikidata":"https://www.wikidata.org/wiki/Q671298","display_name":"Divide and conquer algorithms","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5238999724388123},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.4309999942779541},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.4090000092983246},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3952000141143799},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3682999908924103},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C20854674","wikidata":"https://www.wikidata.org/wiki/Q4386060","display_name":"Cognitive architecture","level":3,"score":0.298799991607666},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2946999967098236},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.29010000824928284},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C84685590","wikidata":"https://www.wikidata.org/wiki/Q1540472","display_name":"Knowledge engineering","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.25380000472068787},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.25189998745918274}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.00450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2506.17589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.17589","pdf_url":"https://arxiv.org/pdf/2506.17589","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2506.17589","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2506.17589","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2506.17589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2506.17589","pdf_url":"https://arxiv.org/pdf/2506.17589","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"real":[1],"value":[2],"of":[3,85,130],"knowledge":[4,70,98,116],"lies":[5],"not":[6],"just":[7],"in":[8,12,37],"its":[9,13],"accumulation,":[10],"but":[11],"potential":[14],"to":[15,19,43,66,91,112],"be":[16],"harnessed":[17],"effectively":[18],"conquer":[20],"the":[21,64,93,128],"unknown.":[22],"Although":[23],"recent":[24],"multimodal":[25,32,69,137],"large":[26],"language":[27],"models":[28],"(MLLMs)":[29],"exhibit":[30],"impressing":[31],"capabilities,":[33],"they":[34],"often":[35],"fail":[36],"rarely":[38],"encountered":[39],"domain-specific":[40],"tasks":[41],"due":[42],"limited":[44],"relevant":[45,115],"knowledge.":[46],"To":[47],"explore":[48],"this,":[49],"we":[50,103],"adopt":[51],"visual":[52],"game":[53],"cognition":[54],"as":[55,63],"a":[56,68,83,105,110,133,142],"testbed":[57],"and":[58,76,100,140],"select":[59],"Monster":[60],"Hunter:":[61],"World":[62],"target":[65],"construct":[67],"graph":[71],"(MH-MMKG),":[72],"which":[73],"incorporates":[74],"multi-modalities":[75],"intricate":[77],"entity":[78],"relations.":[79],"We":[80],"also":[81],"design":[82],"series":[84],"challenging":[86],"queries":[87],"based":[88],"on":[89,136],"MH-MMKG":[90],"evaluate":[92],"models'":[94],"ability":[95],"for":[96,145],"complex":[97],"retrieval":[99],"reasoning.":[101],"Furthermore,":[102],"propose":[104],"multi-agent":[106],"retriever":[107],"that":[108,123],"enables":[109],"model":[111],"autonomously":[113],"search":[114],"without":[117],"additional":[118],"training.":[119],"Experimental":[120],"results":[121],"show":[122],"our":[124],"approach":[125],"significantly":[126],"enhances":[127],"performance":[129],"MLLMs,":[131],"providing":[132],"new":[134],"perspective":[135],"knowledge-augmented":[138],"reasoning":[139],"laying":[141],"solid":[143],"foundation":[144],"future":[146],"research.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
