{"id":"https://openalex.org/W4399554295","doi":"https://doi.org/10.48550/arxiv.2406.06316","title":"Tx-LLM: A Large Language Model for Therapeutics","display_name":"Tx-LLM: A Large Language Model for Therapeutics","publication_year":2024,"publication_date":"2024-06-10","ids":{"openalex":"https://openalex.org/W4399554295","doi":"https://doi.org/10.48550/arxiv.2406.06316"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.06316","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.06316","pdf_url":"https://arxiv.org/pdf/2406.06316","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.06316","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099096923","display_name":"Juan Manuel Zambrano Chaves","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chaves, Juan Manuel Zambrano","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063203332","display_name":"E.-W. Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Eric","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059213795","display_name":"Tao Tu","orcid":"https://orcid.org/0000-0003-3420-7889"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070999557","display_name":"Eeshit Dhaval Vaishnav","orcid":"https://orcid.org/0000-0003-3720-8051"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vaishnav, Eeshit Dhaval","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017402579","display_name":"B. Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Byron","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063201022","display_name":"S. Sara Mahdavi","orcid":"https://orcid.org/0000-0001-6823-598X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahdavi, S. Sara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010171106","display_name":"Christopher Semturs","orcid":"https://orcid.org/0000-0001-6108-2773"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Semturs, Christopher","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035123135","display_name":"David J. Fleet","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fleet, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103234563","display_name":"Vivek Natarajan","orcid":"https://orcid.org/0000-0001-7849-2074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Natarajan, Vivek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5047463591","display_name":"Shekoofeh Azizi","orcid":"https://orcid.org/0000-0002-7447-6031"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azizi, Shekoofeh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5099096923"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8500000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8500000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.8313000202178955,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7886000275611877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3515564799308777},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.33617740869522095}],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3515564799308777},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.33617740869522095}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.06316","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.06316","pdf_url":"https://arxiv.org/pdf/2406.06316","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.06316","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.06316","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.06316","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.06316","pdf_url":"https://arxiv.org/pdf/2406.06316","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320317153","display_name":"DeepMind","ror":"https://ror.org/00971b260"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399554295.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Developing":[0],"therapeutics":[1],"is":[2,72,149],"a":[3,36,45,55,75,93,101,123,231],"lengthy":[4],"and":[5,16,141,152,197,201,211,228],"expensive":[6],"process":[7,23],"that":[8,80],"requires":[9],"the":[10,22,28,88,204,239],"satisfaction":[11],"of":[12,20,30,40,77,87,96,104,126,138,184,206],"many":[13],"different":[14],"criteria,":[15],"AI":[17,32],"models":[18],"capable":[19],"expediting":[21],"would":[24],"be":[25],"invaluable.":[26],"However,":[27],"majority":[29],"current":[31],"approaches":[33],"address":[34],"only":[35],"narrowly":[37],"defined":[38],"set":[39,95],"tasks,":[41],"often":[42],"circumscribed":[43],"within":[44],"particular":[46],"domain.":[47],"To":[48],"bridge":[49],"this":[50],"gap,":[51],"we":[52,202],"introduce":[53],"Tx-LLM,":[54],"generalist":[56],"large":[57],"language":[58],"model":[59,207],"(LLM)":[60],"fine-tuned":[61],"from":[62],"PaLM-2":[63],"which":[64],"encodes":[65],"knowledge":[66,227],"about":[67],"diverse":[68,190],"therapeutic":[69],"modalities.":[70],"Tx-LLM":[71,98,148,218],"trained":[73],"using":[74],"collection":[76],"709":[78],"datasets":[79],"target":[81],"66":[82,139],"tasks":[83,140,159,188,198],"spanning":[84],"various":[85],"stages":[86],"drug":[89,191,240],"discovery":[90,241],"pipeline.":[91,243],"Using":[92],"single":[94],"weights,":[97],"simultaneously":[99],"processes":[100],"wide":[102],"variety":[103],"chemical":[105],"or":[106,171],"biological":[107],"entities(small":[108],"molecules,":[109],"proteins,":[110],"nucleic":[111],"acids,":[112],"cell":[113,168],"lines,":[114],"diseases)":[115],"interleaved":[116],"with":[117,131,164,189],"free-text,":[118],"allowing":[119],"it":[120],"to":[121,176],"predict":[122],"broad":[124],"range":[125],"associated":[127],"properties,":[128],"achieving":[129],"competitive":[130],"state-of-the-art":[132],"(SOTA)":[133],"performance":[134,155],"on":[135,144,156,214],"43":[136],"out":[137],"exceeding":[142],"SOTA":[143],"22.":[145],"Among":[146],"these,":[147],"particularly":[150],"powerful":[151],"exceeds":[153],"best-in-class":[154],"average":[157],"for":[158],"combining":[160],"molecular":[161],"SMILES":[162],"representations":[163],"text":[165],"such":[166],"as":[167,234],"line":[169],"names":[170],"disease":[172],"names,":[173],"likely":[174],"due":[175],"context":[177],"learned":[178],"during":[179],"pretraining.":[180],"We":[181,216],"observe":[182],"evidence":[183],"positive":[185],"transfer":[186],"between":[187],"types":[192],"(e.g.,tasks":[193],"involving":[194,199],"small":[195],"molecules":[196],"proteins),":[200],"study":[203],"impact":[205],"size,":[208],"domain":[209],"finetuning,":[210],"prompting":[212],"strategies":[213],"performance.":[215],"believe":[217],"represents":[219],"an":[220,235],"important":[221],"step":[222],"towards":[223],"LLMs":[224],"encoding":[225],"biochemical":[226],"could":[229],"have":[230],"future":[232],"role":[233],"end-to-end":[236],"tool":[237],"across":[238],"development":[242]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":4}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
