{"id":"https://openalex.org/W7143405020","doi":"https://doi.org/10.48550/arxiv.2603.26128","title":"TaxaAdapter: Vision Taxonomy Models are Key to Fine-grained Image Generation over the Tree of Life","display_name":"TaxaAdapter: Vision Taxonomy Models are Key to Fine-grained Image Generation over the Tree of Life","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143405020","doi":"https://doi.org/10.48550/arxiv.2603.26128"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26128","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059865613","display_name":"Mridul Khurana","orcid":"https://orcid.org/0009-0003-9346-3206"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Khurana, Mridul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083442260","display_name":"Amin Karimi Monsefi","orcid":"https://orcid.org/0000-0002-6101-2828"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Monsefi, Amin Karimi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130993118","display_name":"Justin Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Justin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130954630","display_name":"Medha Sawhney","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sawhney, Medha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090386394","display_name":"David Carlyn","orcid":"https://orcid.org/0000-0002-8323-0359"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Carlyn, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044043575","display_name":"Julia Chae","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chae, Julia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044808270","display_name":"Jianyang Gu","orcid":"https://orcid.org/0000-0002-4060-7427"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Jianyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073535794","display_name":"Rajiv Ramnath","orcid":"https://orcid.org/0000-0003-0093-8560"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramnath, Rajiv","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130973987","display_name":"Sara Beery","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beery, Sara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130961685","display_name":"Wei-Lun Chao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chao, Wei-Lun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081622450","display_name":"Anuj Karpatne","orcid":"https://orcid.org/0000-0003-1647-3534"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karpatne, Anuj","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130915667","display_name":"Cheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Cheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5059865613"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.275299996137619,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.275299996137619,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08250000327825546,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.066600002348423,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.7389000058174133},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5935999751091003},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.5898000001907349},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.510200023651123},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.48820000886917114},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4846999943256378}],"concepts":[{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.7389000058174133},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7117000222206116},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6482999920845032},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5935999751091003},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.5898000001907349},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.510200023651123},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.48820000886917114},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4846999943256378},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4526999890804291},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.43860000371932983},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.36629998683929443},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C116222747","wikidata":"https://www.wikidata.org/wiki/Q220888","display_name":"Falsifiability","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.28859999775886536},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2874000072479248},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.26499998569488525}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land","score":0.5836100578308105}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Accurately":[0],"generating":[1],"images":[2,185],"across":[3],"the":[4,28,40],"Tree":[5],"of":[6,19,156,183],"Life":[7],"is":[8],"difficult:":[9],"there":[10],"are":[11,198],"over":[12,98,118],"10M":[13],"distinct":[14],"species":[15,46,77,170,178,188,205],"on":[16],"Earth,":[17],"many":[18],"which":[20],"differ":[21],"only":[22,180],"by":[23],"subtle":[24],"visual":[25,42],"traits.":[26],"Despite":[27],"remarkable":[29],"progress":[30],"in":[31,172],"text-to-image":[32,87],"synthesis,":[33],"existing":[34],"models":[35],"often":[36],"fail":[37],"to":[38,74],"capture":[39],"fine-grained":[41,76,204],"cues":[43],"that":[44,65,109,142,163,196],"define":[45],"identity,":[47],"even":[48,187],"when":[49],"their":[50],"outputs":[51],"appear":[52],"photo-realistic.":[53],"To":[54,128],"this":[55],"end,":[56],"we":[57,133,161],"propose":[58],"TaxaAdapter,":[59],"a":[60,85,122,136,152,181,199],"simple":[61],"and":[62,104,115,125,148,186],"lightweight":[63],"approach":[64],"incorporates":[66],"Vision":[67],"Taxonomy":[68],"Models":[69],"(VTMs)":[70],"such":[71,100,175],"as":[72,101,176],"BioCLIP":[73],"guide":[75],"generation.":[78,206],"Our":[79],"method":[80],"injects":[81],"VTM":[82],"embeddings":[83],"into":[84],"frozen":[86],"diffusion":[88],"model,":[89],"improving":[90],"species-level":[91],"fidelity":[92,114],"while":[93],"preserving":[94],"flexible":[95],"text":[96],"control":[97],"attributes":[99],"pose,":[102],"style,":[103],"background.":[105],"Extensive":[106],"experiments":[107],"demonstrate":[108],"TaxaAdapter":[110,164],"consistently":[111],"improves":[112],"morphology":[113],"species-identity":[116],"accuracy":[117],"strong":[119,166],"baselines,":[120],"with":[121,179],"cleaner":[123],"architecture":[124],"training":[126,184],"recipe.":[127],"better":[129],"evaluate":[130],"these":[131],"improvements,":[132],"also":[134],"introduce":[135],"multimodal":[137],"Large":[138],"Language":[139],"Model-based":[140],"metric":[141],"summarizes":[143],"trait-level":[144],"descriptions":[145],"from":[146],"generated":[147],"real":[149],"images,":[150],"providing":[151],"more":[153],"interpretable":[154],"measure":[155],"morphological":[157],"consistency.":[158],"Beyond":[159],"this,":[160],"observe":[162],"exhibits":[165],"generalization":[167],"capabilities,":[168],"enabling":[169],"synthesis":[171],"challenging":[173],"regimes":[174],"few-shot":[177],"handful":[182],"unseen":[189],"during":[190],"training.":[191],"Overall,":[192],"our":[193],"results":[194],"highlight":[195],"VTMs":[197],"key":[200],"ingredient":[201],"for":[202],"scalable,":[203]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2026-03-31T00:00:00"}
