{"id":"https://openalex.org/W4404344192","doi":"https://doi.org/10.48550/arxiv.2410.22296","title":"Generalists vs. Specialists: Evaluating LLMs on Highly-Constrained Biophysical Sequence Optimization Tasks","display_name":"Generalists vs. Specialists: Evaluating LLMs on Highly-Constrained Biophysical Sequence Optimization Tasks","publication_year":2024,"publication_date":"2024-10-29","ids":{"openalex":"https://openalex.org/W4404344192","doi":"https://doi.org/10.48550/arxiv.2410.22296"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.22296","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22296","pdf_url":"https://arxiv.org/pdf/2410.22296","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.22296","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102736359","display_name":"Samuel C. Stanton","orcid":"https://orcid.org/0000-0003-1664-2465"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Stanton, Samuel","raw_affiliation_strings":["Genentech"],"affiliations":[{"raw_affiliation_string":"Genentech","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101278232","display_name":"Angelica Chen","orcid":"https://orcid.org/0000-0002-1744-3209"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Angelica","raw_affiliation_strings":["Genentech"],"affiliations":[{"raw_affiliation_string":"Genentech","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102736359"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.6910009384155273},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.40705326199531555},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.35047242045402527},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.32194966077804565},{"id":"https://openalex.org/keywords/biochemistry","display_name":"Biochemistry","score":0.14280807971954346}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.6910009384155273},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.40705326199531555},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.35047242045402527},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.32194966077804565},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.14280807971954346}],"mesh":[],"locations_count":4,"locations":[{"id":"pmh:oai:arXiv.org:2410.22296","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22296","pdf_url":"https://arxiv.org/pdf/2410.22296","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.22296","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.22296","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"doi:10.5281/zenodo.14926733","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14926733","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"},{"id":"doi:10.5281/zenodo.14926734","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14926734","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.22296","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22296","pdf_url":"https://arxiv.org/pdf/2410.22296","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4391375266","https://openalex.org/W2082860237","https://openalex.org/W2119695867","https://openalex.org/W2130076355","https://openalex.org/W1990804418","https://openalex.org/W1993764875","https://openalex.org/W2046158694","https://openalex.org/W2788277189"],"abstract_inverted_index":{"Although":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"have":[5],"shown":[6],"promise":[7],"in":[8],"biomolecule":[9],"optimization":[10,74,99],"problems,":[11],"they":[12],"incur":[13,169],"heavy":[14],"computational":[15],"costs":[16],"and":[17,33,51,148,168],"struggle":[18,81,149],"to":[19,47,82,120],"satisfy":[20],"precise":[21],"constraints.":[22],"On":[23],"the":[24,68],"other":[25],"hand,":[26],"specialized":[27,163],"solvers":[28,164],"like":[29],"LaMBO-2":[30,135],"offer":[31],"efficiency":[32],"fine-grained":[34],"control":[35],"but":[36,125,162],"require":[37],"more":[38],"domain":[39],"expertise.":[40],"Comparing":[41],"these":[42],"approaches":[43],"is":[44],"challenging":[45],"due":[46],"expensive":[48],"laboratory":[49],"validation":[50],"inadequate":[52],"synthetic":[53,63],"benchmarks.":[54],"We":[55],"address":[56],"this":[57],"by":[58],"introducing":[59],"Ehrlich":[60,84,123,139],"functions,":[61,124],"a":[62,97,108],"test":[64],"suite":[65],"that":[66],"captures":[67],"geometric":[69],"structure":[70],"of":[71],"biophysical":[72],"sequence":[73],"problems.":[75],"With":[76],"prompting":[77],"alone,":[78],"off-the-shelf":[79],"LLMs":[80,142,156],"optimize":[83],"functions.":[85],"In":[86],"response,":[87],"we":[88,113],"propose":[89],"LLOME":[90,115],"(Language":[91],"Model":[92],"Optimization":[93],"with":[94,107],"Margin":[95],"Expectation),":[96],"bilevel":[98],"routine":[100],"for":[101],"online":[102],"black-box":[103],"optimization.":[104],"When":[105],"combined":[106],"novel":[109],"preference":[110],"learning":[111],"loss,":[112],"find":[114],"can":[116,126,157],"not":[117],"only":[118],"learn":[119],"solve":[121],"some":[122,145],"even":[127],"perform":[128],"as":[129,131],"well":[130],"or":[132],"better":[133],"than":[134],"on":[136],"moderately":[137],"difficult":[138],"variants.":[140],"However,":[141],"also":[143],"exhibit":[144],"likelihood-reward":[146],"miscalibration":[147],"without":[150],"explicit":[151],"rewards.":[152],"Our":[153],"results":[154],"indicate":[155],"occasionally":[158],"provide":[159],"significant":[160],"benefits,":[161],"are":[165],"still":[166],"competitive":[167],"less":[170],"overhead.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2024-11-14T00:00:00"}
