{"id":"https://openalex.org/W7137968131","doi":"https://doi.org/10.1609/aaai.v40i4.37226","title":"Stop Mixing Things Up! BISCUIT Teaches Vision-Language Models to Learn New Concepts from Images on the Spot","display_name":"Stop Mixing Things Up! BISCUIT Teaches Vision-Language Models to Learn New Concepts from Images on the Spot","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137968131","doi":"https://doi.org/10.1609/aaai.v40i4.37226"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i4.37226","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37226","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37226/41188","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37226/41188","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109676444","display_name":"Jiahua Bao","orcid":"https://orcid.org/0000-0003-0610-4321"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahua Bao","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129742804","display_name":"Siyao Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyao Cheng","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113871275","display_name":"Jiaxing Du","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102840","display_name":"Ubiquitous Energy (United States)","ror":"https://ror.org/01b06j989","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102840"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiaxing Du","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I4210102840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129733562","display_name":"Yuhang Jia","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102840","display_name":"Ubiquitous Energy (United States)","ror":"https://ror.org/01b06j989","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102840"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuhang Jia","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I4210102840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129694997","display_name":"Boyang Niu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102840","display_name":"Ubiquitous Energy (United States)","ror":"https://ror.org/01b06j989","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102840"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boyang Niu","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I4210102840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125711910","display_name":"Zeming Lang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210102840","display_name":"Ubiquitous Energy (United States)","ror":"https://ror.org/01b06j989","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102840"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zeming Lang","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I4210102840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103203226","display_name":"Changjiang He","orcid":"https://orcid.org/0000-0003-4447-1130"},"institutions":[{"id":"https://openalex.org/I4210102840","display_name":"Ubiquitous Energy (United States)","ror":"https://ror.org/01b06j989","country_code":"US","type":"company","lineage":["https://openalex.org/I4210102840"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changjiang He","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China","institution_ids":["https://openalex.org/I4210102840"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129654853","display_name":"Hao Y. Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhang","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129648671","display_name":"Jie Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Liu","raw_affiliation_strings":["Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China"],"affiliations":[{"raw_affiliation_string":"Research Center of Ubiquitous Computing and Intelligent Systems, Harbin Institute of Technology, China\nNational Key Laboratory of Smart Farming Technology and Systems, China\nChina Mobile 5G Institute, China","institution_ids":["https://openalex.org/I4210090176"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5109676444"],"corresponding_institution_ids":["https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22800587,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"4","first_page":"2408","last_page":"2416"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9435999989509583,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9435999989509583,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0210999995470047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0066999997943639755,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/unification","display_name":"Unification","score":0.5971999764442444},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5554999709129333},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5325000286102295},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4733999967575073},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4715999960899353},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.4404999911785126},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4278999865055084},{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.4178999960422516}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7634999752044678},{"id":"https://openalex.org/C96146094","wikidata":"https://www.wikidata.org/wiki/Q609057","display_name":"Unification","level":2,"score":0.5971999764442444},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5554999709129333},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5325000286102295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5202999711036682},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4733999967575073},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4715999960899353},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.4404999911785126},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4278999865055084},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.40689998865127563},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C2777055276","wikidata":"https://www.wikidata.org/wiki/Q7936580","display_name":"Visual approach","level":2,"score":0.36070001125335693},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C2777877512","wikidata":"https://www.wikidata.org/wiki/Q1116097","display_name":"Common ground","level":2,"score":0.33660000562667847},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.3328000009059906},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.2985000014305115},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.29089999198913574},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27090001106262207},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2583000063896179},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i4.37226","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37226","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37226/41188","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i4.37226","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i4.37226","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37226/41188","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2476221792","display_name":null,"funder_award_id":"Heilongjiang","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3518449552","display_name":null,"funder_award_id":"61972114","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3537643952","display_name":null,"funder_award_id":"62350710797","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4729246649","display_name":null,"funder_award_id":"62106061","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5315742300","display_name":null,"funder_award_id":"2022ZX01A22","funder_id":"https://openalex.org/F4320317323","funder_display_name":"Key Research and Development Program of Heilongjiang"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8513333314","display_name":null,"funder_award_id":"2021ZD01","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8955107213","display_name":null,"funder_award_id":"Major","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320317323","display_name":"Key Research and Development Program of Heilongjiang","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137968131.pdf","grobid_xml":"https://content.openalex.org/works/W7137968131.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"have":[3],"achieved":[4],"impressive":[5],"performance":[6,174],"across":[7,161,182],"various":[8],"tasks,":[9],"but":[10],"often":[11],"struggle":[12],"to":[13,43,72,125,171,200,214],"apply":[14,198],"newly":[15,146],"introduced":[16,147],"visual":[17,67,78,148,163],"concepts":[18,53,75,156,181],"during":[19],"inference.":[20],"A":[21],"common":[22],"failure":[23,42],"pattern":[24],"is":[25],"what":[26],"we":[27,90],"call":[28],"Mixing":[29],"Things":[30],"Up:":[31],"VLMs":[32,61,173],"frequently":[33],"confuse":[34],"concept":[35,46,127,184],"names,":[36],"resulting":[37],"in":[38],"vague":[39],"descriptions":[40],"and":[41,99,117,144,157,177,186,194,202,210,216],"ground":[44],"the":[45,70,141,145,212],"correctly.":[47],"Existing":[48,150],"approaches":[49],"mainly":[50,152],"address":[51],"person-related":[52],"through":[54,95,134],"text":[55,143],"prompts":[56],"or":[57,64],"tokenizer":[58],"modifications.":[59],"However,":[60],"still":[62],"miss":[63],"misinterpret":[65],"untrained":[66],"concepts,":[68],"underscoring":[69],"need":[71],"learn":[73],"new":[74],"directly":[76],"from":[77],"input,":[79],"without":[80],"relying":[81],"on":[82,154,175,227],"prior":[83],"textual":[84],"injection.":[85],"To":[86],"overcome":[87],"these":[88],"limitations,":[89],"propose":[91,167],"BISCUIT":[92,199,220],"(Basis-aligned":[93],"Inference":[94],"Structured":[96],"Concept":[97],"Unification":[98],"Identification-aware":[100],"Tuning),":[101],"a":[102,109,121,168],"two-step":[103],"training":[104],"method.":[105],"Step":[106,129],"I":[107],"proposes":[108],"dual-stream":[110],"structure-aware":[111],"vision":[112],"encoder":[113],"that":[114],"fuses":[115],"RGB":[116],"edge-based":[118],"embeddings":[119],"within":[120],"shared":[122],"basis":[123],"space":[124],"enhance":[126],"recognition.":[128],"II":[130],"enhances":[131],"generation":[132],"quality":[133],"identification-aware":[135],"tuning,":[136],"which":[137],"encourages":[138],"alignment":[139],"between":[140],"generated":[142],"concepts.":[149],"methods":[151],"focus":[153],"person":[155],"lack":[158],"comprehensive":[159],"evaluation":[160],"diverse":[162,183],"categories.":[164],"We":[165,197],"further":[166],"benchmark":[169],"BiscuitVQA":[170],"evaluate":[172],"recognizing":[176],"applying":[178],"novel":[179],"image-introduced":[180],"types":[185],"task":[187],"types,":[188],"including":[189],"real":[190],"people,":[191],"cartoons,":[192],"animals,":[193],"symbolic":[195],"content.":[196],"LLaVA-1.5":[201],"Qwen2.5-VL,":[203],"achieving":[204],"competitive":[205],"results":[206],"among":[207],"open-source":[208],"models":[209],"narrowing":[211],"gap":[213],"Gemini-2.5":[215],"GPT-4o.":[217],"Interestingly,":[218],"our":[219],"maintains":[221],"strong":[222],"generalization,":[223],"showing":[224],"minimal":[225],"degradation":[226],"other":[228],"downstream":[229],"tasks.":[230]},"counts_by_year":[],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2026-03-18T00:00:00"}
