{"id":"https://openalex.org/W7147708817","doi":"https://doi.org/10.48550/arxiv.2603.28474","title":"CiQi-Agent: Aligning Vision, Tools and Aesthetics in Multimodal Agent for Cultural Reasoning on Chinese Porcelains","display_name":"CiQi-Agent: Aligning Vision, Tools and Aesthetics in Multimodal Agent for Cultural Reasoning on Chinese Porcelains","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7147708817","doi":"https://doi.org/10.48550/arxiv.2603.28474"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.28474","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28474","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.28474","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122913535","display_name":"Wenhan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Wenhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132654073","display_name":"Zhixiang Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zhixiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132719355","display_name":"Zhongtian Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Zhongtian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132717524","display_name":"Yanzhu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yanzhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132732344","display_name":"Ziyu Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Ziyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132724091","display_name":"Hao Sheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sheng, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132631814","display_name":"Pengfei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Pengfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132691627","display_name":"Honglin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Honglin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132612833","display_name":"Wenqi Shao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shao, Wenqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132656525","display_name":"Qiaosheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qiaosheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132570358","display_name":"Yu Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4083999991416931,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4083999991416931,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.12919999659061432,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.0982000008225441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/antique","display_name":"Antique","score":0.703499972820282},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4212999939918518},{"id":"https://openalex.org/keywords/affordance","display_name":"Affordance","score":0.40220001339912415},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3840000033378601},{"id":"https://openalex.org/keywords/cultural-heritage","display_name":"Cultural heritage","score":0.35850000381469727}],"concepts":[{"id":"https://openalex.org/C2780376419","wikidata":"https://www.wikidata.org/wiki/Q472760","display_name":"Antique","level":2,"score":0.703499972820282},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5206999778747559},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4311000108718872},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4212999939918518},{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.40220001339912415},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3840000033378601},{"id":"https://openalex.org/C60671577","wikidata":"https://www.wikidata.org/wiki/Q210272","display_name":"Cultural heritage","level":2,"score":0.35850000381469727},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.32519999146461487},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.28474","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28474","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.28474","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28474","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.7820563912391663,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,192],"connoisseurship":[1,62,101],"of":[2,42,156],"antique":[3,43],"Chinese":[4,44],"porcelain":[5,49,116],"demands":[6],"extensive":[7],"historical":[8],"expertise,":[9],"material":[10],"understanding,":[11],"and":[12,26,51,56,76,91,94,120,125,147,161,175,194,199],"aesthetic":[13],"sensitivity,":[14],"making":[15],"it":[16,82],"difficult":[17],"for":[18,39],"non-specialists":[19],"to":[20,97],"engage.":[21],"To":[22,103],"democratize":[23],"cultural-heritage":[24],"understanding":[25],"assist":[27],"expert":[28],"connoisseurship,":[29],"we":[30,107],"introduce":[31],"CiQi-Agent":[32,46,139,169],"--":[33],"a":[34,109,128,148,158],"domain-specific":[35],"Porcelain":[36],"Connoisseurship":[37],"Agent":[38],"intelligent":[40],"analysis":[41,63],"porcelain.":[45],"supports":[47],"multi-image":[48],"inputs":[50],"enables":[52],"vision":[53,159],"tool":[54,160],"invocation":[55],"multimodal":[57,162],"retrieval-augmented":[58],"generation,":[59],"performing":[60],"fine-grained":[61],"across":[64,178],"six":[65,137,180],"attributes:":[66],"dynasty,":[67],"reign":[68],"period,":[69],"kiln":[70],"site,":[71],"glaze":[72],"color,":[73],"decorative":[74],"motif,":[75],"vessel":[77],"shape.":[78],"Beyond":[79],"attribute":[80],"classification,":[81],"captures":[83],"subtle":[84],"visual":[85,93,122],"details,":[86],"retrieves":[87],"relevant":[88],"domain":[89],"knowledge,":[90],"integrates":[92,153],"textual":[95],"evidence":[96],"produce":[98],"coherent,":[99],"explainable":[100],"descriptions.":[102],"achieve":[104],"this":[105],"capability,":[106],"construct":[108],"large-scale,":[110],"expert-annotated":[111],"dataset":[112,195],"CiQi-VQA,":[113],"comprising":[114],"29,596":[115],"specimens,":[117],"51,553":[118],"images,":[119],"557,940":[121],"question--answering":[123],"pairs,":[124],"further":[126],"establish":[127],"comprehensive":[129],"benchmark":[130],"CiQi-Bench":[131],"aligned":[132],"with":[133],"the":[134],"previously":[135],"mentioned":[136],"attributes.":[138],"is":[140],"trained":[141],"through":[142],"supervised":[143],"fine-tuning,":[144],"reinforcement":[145],"learning,":[146],"tool-augmented":[149],"reasoning":[150],"framework":[151],"that":[152,168],"two":[154],"categories":[155],"tools:":[157],"retrieval":[163],"tools.":[164],"Experimental":[165],"results":[166],"show":[167],"(7B)":[170],"outperforms":[171],"all":[172,179],"competitive":[173],"open-":[174],"closed-source":[176],"models":[177],"attributes":[181],"on":[182,185],"CiQi-Bench,":[183],"achieving":[184],"average":[186],"12.2\\%":[187],"higher":[188],"accuracy":[189],"than":[190],"GPT-5.":[191],"model":[193],"have":[196],"been":[197],"released":[198],"are":[200],"publicly":[201],"available":[202],"at":[203],"https://huggingface.co/datasets/SII-Monument-Valley/CiQi-VQA.":[204]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-02T00:00:00"}
