{"id":"https://openalex.org/W7156786351","doi":"https://doi.org/10.48550/arxiv.2604.24029","title":"DeepTaxon: An Interpretable Retrieval-Augmented Multimodal Framework for Unified Species Identification and Discovery","display_name":"DeepTaxon: An Interpretable Retrieval-Augmented Multimodal Framework for Unified Species Identification and Discovery","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7156786351","doi":"https://doi.org/10.48550/arxiv.2604.24029"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24029","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24029","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24029","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134760947","display_name":"Jiawei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiawei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134762332","display_name":"Ming Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei, Ming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134795631","display_name":"Yaning Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yaning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134756970","display_name":"Xinyan Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Xinyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034405338","display_name":"Yuquan Le","orcid":"https://orcid.org/0000-0001-6283-9037"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Yuquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134759040","display_name":"Qiwei Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Qiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134761354","display_name":"Zhiwei Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhiwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134816189","display_name":"Zheqi Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Zheqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122887081","display_name":"Yuchen Ang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ang, Yuchen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021932264","display_name":"Zhe Quan","orcid":"https://orcid.org/0000-0003-2669-9190"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Quan, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134123705","display_name":"Tat-Seng Chua","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chua, Tat-Seng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.5861999988555908,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.5861999988555908,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.05559999868273735,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13435","display_name":"Animal and Plant Science Education","score":0.03819999843835831,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6011000275611877},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5462999939918518},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.42010000348091125},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.34119999408721924},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3181000053882599},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.30239999294281006},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.3003999888896942}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6805999875068665},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6011000275611877},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5462999939918518},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5450000166893005},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4699000120162964},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.42010000348091125},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3418000042438507},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.34119999408721924},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3386000096797943},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.3001999855041504},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.28380000591278076},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2597000002861023}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24029","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24029","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24029","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24029","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.4006552994251251,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Identifying":[0],"species":[1,15,57,77],"in":[2,16,23,189],"biology":[3],"among":[4],"tens":[5],"of":[6,8],"thousands":[7],"visually":[9],"similar":[10],"taxa":[11],"while":[12],"discovering":[13],"unknown":[14],"open-world":[17],"environments":[18],"remains":[19],"a":[20,51,69,84,130,178],"fundamental":[21],"challenge":[22],"biodiversity":[24,226],"research.":[25,227],"Current":[26],"methods":[27],"treat":[28],"identification":[29,58,191],"and":[30,41,59,87,114,182,192,205,215],"discovery":[31,42,60,95,133],"as":[32,96],"separate":[33],"problems,":[34],"with":[35,78,201],"classification":[36,131],"models":[37],"assuming":[38],"closed":[39],"sets":[40],"relying":[43],"on":[44,152,160,177],"threshold-based":[45],"rejection.":[46],"Here":[47],"we":[48,93],"present":[49],"DeepTaxon,":[50],"retrieval-augmented":[52,154],"multimodal":[53],"framework":[54,148],"that":[55,169],"unifies":[56],"through":[61],"interpretable":[62,223],"reasoning":[63],"over":[64],"retrieved":[65],"visual":[66],"evidence.":[67],"Given":[68],"query":[70],"image,":[71],"DeepTaxon":[72],"retrieves":[73],"the":[74,117,147],"top-$k$":[75],"candidate":[76,202],"$n$":[79],"exemplar":[80,206],"images":[81],"each":[82,126],"from":[83],"retrieval":[85,118,127,165,219],"index":[86,119],"performs":[88],"chain-of-thought":[89],"comparative":[90],"reasoning.":[91],"Critically,":[92],"redefine":[94],"an":[97,104,222],"explicit,":[98],"retrieval-based":[99],"decision":[100],"problem":[101],"rather":[102],"than":[103],"implicit":[105],"parametric":[106],"memory":[107],"problem.":[108],"A":[109],"sample":[110],"is":[111],"novel":[112],"if":[113,116],"only":[115],"lacks":[120],"sufficient":[121],"evidence":[122],"for":[123,142,225],"identification,":[124],"so":[125],"naturally":[128],"yields":[129],"or":[132],"label":[134],"without":[135],"manual":[136],"annotation,":[137],"thereby":[138],"providing":[139],"automatic":[140],"supervision":[141],"both":[143,190],"tasks.":[144],"We":[145],"train":[146],"via":[149],"supervised":[150],"fine-tuning":[151],"synthetic":[153],"data,":[155],"followed":[156],"by":[157],"reinforcement":[158],"learning":[159],"hard":[161],"samples,":[162],"converting":[163],"high-recall":[164],"into":[166],"high-precision":[167],"decisions":[168],"scale":[170],"to":[171,212],"massive":[172],"taxonomic":[173],"vocabularies.":[174],"Extensive":[175],"experiments":[176],"large-scale":[179],"in-distribution":[180],"benchmark":[181],"six":[183],"out-of-distribution":[184],"datasets":[185],"demonstrate":[186],"consistent":[187,216],"improvements":[188],"discovery.":[193],"Ablation":[194],"studies":[195],"further":[196],"reveal":[197],"effective":[198],"test-time":[199],"scaling":[200],"count":[203,207],"$k$":[204],"$n$,":[208],"strong":[209],"zero-shot":[210],"transfer":[211],"unseen":[213],"domains,":[214],"performance":[217],"across":[218],"encoders,":[220],"establishing":[221],"solution":[224]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-29T00:00:00"}
