{"id":"https://openalex.org/W7136762320","doi":"https://doi.org/10.48550/arxiv.2603.12743","title":"MoKus: Leveraging Cross-Modal Knowledge Transfer for Knowledge-Aware Concept Customization","display_name":"MoKus: Leveraging Cross-Modal Knowledge Transfer for Knowledge-Aware Concept Customization","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136762320","doi":"https://doi.org/10.48550/arxiv.2603.12743"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12743","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129480690","display_name":"Chenyang Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhu, Chenyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017617498","display_name":"Hongxiang Li","orcid":"https://orcid.org/0009-0000-7710-8835"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hongxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129494722","display_name":"Xiu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129552174","display_name":"Long Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Long","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5129480690"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6901000142097473,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6901000142097473,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.07500000298023224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.07429999858140945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6064000129699707},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5819000005722046},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5691999793052673},{"id":"https://openalex.org/keywords/knowledge-transfer","display_name":"Knowledge transfer","score":0.4918000102043152},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.438400000333786},{"id":"https://openalex.org/keywords/knowledge-acquisition","display_name":"Knowledge acquisition","score":0.43779999017715454},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4375},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.42820000648498535},{"id":"https://openalex.org/keywords/knowledge-representation-and-reasoning","display_name":"Knowledge representation and reasoning","score":0.41609999537467957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8212000131607056},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6064000129699707},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5819000005722046},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5691999793052673},{"id":"https://openalex.org/C2776960227","wikidata":"https://www.wikidata.org/wiki/Q2586354","display_name":"Knowledge transfer","level":2,"score":0.4918000102043152},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4505000114440918},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.438400000333786},{"id":"https://openalex.org/C2777220311","wikidata":"https://www.wikidata.org/wiki/Q6423340","display_name":"Knowledge acquisition","level":2,"score":0.43779999017715454},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4375},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.42820000648498535},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.41609999537467957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41029998660087585},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C29804473","wikidata":"https://www.wikidata.org/wiki/Q2025711","display_name":"Open Knowledge Base Connectivity","level":4,"score":0.3458000123500824},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C124469403","wikidata":"https://www.wikidata.org/wiki/Q1813993","display_name":"Procedural knowledge","level":3,"score":0.3077000081539154},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C84685590","wikidata":"https://www.wikidata.org/wiki/Q1540472","display_name":"Knowledge engineering","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2680000066757202},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25999999046325684},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Concept":[0,45],"customization":[1],"typically":[2],"binds":[3],"rare":[4,25,29],"tokens":[5,30],"to":[6,32,56,65,73,88,122,147,169,214,218,237],"a":[7,47,96,107],"target":[8,39,57,90,154],"concept.":[9,40,91,155],"Unfortunately,":[10],"these":[11,24,28],"approaches":[12],"often":[13],"suffer":[14],"from":[15],"unstable":[16],"performance":[17],"as":[18],"the":[19,34,38,63,67,70,79,85,89,117,123,144,149,153,163,166,170,185,190,208,232],"pretraining":[20],"data":[21],"seldom":[22],"contains":[23,133],"tokens.":[26],"Meanwhile,":[27,78],"fail":[31],"convey":[33],"inherent":[35],"knowledge":[36,55,68,87,111,115,159,167,210,242],"of":[37,152,234],"Consequently,":[41],"we":[42,93,141,161,188],"introduce":[43,189],"Knowledge-aware":[44],"Customization,":[46],"novel":[48,97],"task":[49,61],"aiming":[50],"at":[51],"binding":[52],"diverse":[53],"textual":[54,86,158],"visual":[58,124,138,150],"concepts.":[59],"This":[60],"requires":[62],"model":[64,80],"identify":[66],"within":[69,116],"text":[71,118],"prompt":[72],"perform":[74],"high-fidelity":[75,174],"customized":[76,175],"generation.":[77,127,176],"should":[81],"efficiently":[82],"bind":[83],"all":[84],"Therefore,":[92],"propose":[94],"MoKus,":[95],"framework":[98,104],"for":[99,165,193],"knowledge-aware":[100,194,220],"concept":[101,139,195,224,227],"customization.":[102],"Our":[103],"relies":[105],"on":[106,184,240],"key":[108],"observation:":[109],"cross-modal":[110],"transfer,":[112],"where":[113],"modifying":[114],"modality":[119,125],"naturally":[120],"transfers":[121],"during":[126],"Inspired":[128],"by":[129],"this":[130],"observation,":[131],"MoKus":[132,183,203,213],"two":[134],"stages:":[135],"(1)":[136],"In":[137,157],"learning,":[140],"first":[142,191],"learn":[143],"anchor":[145,171],"representation":[146],"store":[148],"information":[151],"(2)":[156],"updating,":[160],"update":[162],"answer":[164],"queries":[168],"representation,":[172],"enabling":[173],"To":[177],"further":[178],"comprehensively":[179],"evaluate":[180],"our":[181,235],"proposed":[182],"new":[186],"task,":[187],"benchmark":[192],"customization:":[196],"KnowCusBench.":[197],"Extensive":[198],"evaluations":[199],"have":[200],"demonstrated":[201],"that":[202],"outperforms":[204],"state-of-the-art":[205],"methods.":[206],"Moreover,":[207],"cross-model":[209],"transfer":[211],"allows":[212],"be":[215],"easily":[216],"extended":[217],"other":[219],"applications":[221],"like":[222],"virtual":[223],"creation":[225],"and":[226],"erasure.":[228],"We":[229],"also":[230],"demonstrate":[231],"capability":[233],"method":[236],"achieve":[238],"improvements":[239],"world":[241],"benchmarks.":[243]},"counts_by_year":[],"updated_date":"2026-03-17T07:05:13.627479","created_date":"2026-03-17T00:00:00"}
