{"id":"https://openalex.org/W4406259721","doi":"https://doi.org/10.1109/bibm62325.2024.10822695","title":"TourSynbio: A Multi-Modal Large Model and Agent Framework to Bridge Text and Protein Sequences for Protein Engineering","display_name":"TourSynbio: A Multi-Modal Large Model and Agent Framework to Bridge Text and Protein Sequences for Protein Engineering","publication_year":2024,"publication_date":"2024-12-03","ids":{"openalex":"https://openalex.org/W4406259721","doi":"https://doi.org/10.1109/bibm62325.2024.10822695"},"language":"en","primary_location":{"id":"doi:10.1109/bibm62325.2024.10822695","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm62325.2024.10822695","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072622467","display_name":"Yiqing Shen","orcid":"https://orcid.org/0000-0001-7866-3339"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yiqing Shen","raw_affiliation_strings":["Toursun Synbio,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Toursun Synbio,Shanghai,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045107267","display_name":"Zan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zan Chen","raw_affiliation_strings":["Toursun Synbio,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Toursun Synbio,Shanghai,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107421506","display_name":"Michail Mamalakis","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Michail Mamalakis","raw_affiliation_strings":["University of Cambridge,Department of Computer Science and Technology,Cambridge,UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge,Department of Computer Science and Technology,Cambridge,UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108154016","display_name":"Yungeng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yungeng Liu","raw_affiliation_strings":["Toursun Synbio,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Toursun Synbio,Shanghai,China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082365525","display_name":"Tianbin Li","orcid":"https://orcid.org/0000-0003-2167-586X"},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianbin Li","raw_affiliation_strings":["Shanghai AI Laboratory,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Shanghai AI Laboratory,Shanghai,China","institution_ids":["https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111335419","display_name":"Yanzhou Su","orcid":null},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanzhou Su","raw_affiliation_strings":["Shanghai AI Laboratory,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Shanghai AI Laboratory,Shanghai,China","institution_ids":["https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056327997","display_name":"Junjun He","orcid":"https://orcid.org/0000-0002-1813-1784"},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjun He","raw_affiliation_strings":["Shanghai AI Laboratory,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Shanghai AI Laboratory,Shanghai,China","institution_ids":["https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056748708","display_name":"P\u00edetro Li\u00f3","orcid":"https://orcid.org/0000-0002-0540-5053"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pietro Li\u00f2","raw_affiliation_strings":["University of Cambridge,Department of Computer Science and Technology,Cambridge,UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge,Department of Computer Science and Technology,Cambridge,UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070236689","display_name":"Yu Guang Wang","orcid":"https://orcid.org/0000-0002-7450-0273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu Guang Wang","raw_affiliation_strings":["Toursun Synbio,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Toursun Synbio,Shanghai,China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5072622467"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2503,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.80359093,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2382","last_page":"2389"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.8012999892234802,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.8012999892234802,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.7634000182151794,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.7515000104904175,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6268556714057922},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6243042349815369},{"id":"https://openalex.org/keywords/protein-engineering","display_name":"Protein engineering","score":0.6152903437614441},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.5934417843818665},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13750532269477844},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.12275934219360352},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.09904277324676514},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.06770500540733337},{"id":"https://openalex.org/keywords/anatomy","display_name":"Anatomy","score":0.05843484401702881}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6268556714057922},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6243042349815369},{"id":"https://openalex.org/C147816474","wikidata":"https://www.wikidata.org/wiki/Q169525","display_name":"Protein engineering","level":3,"score":0.6152903437614441},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.5934417843818665},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13750532269477844},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.12275934219360352},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09904277324676514},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.06770500540733337},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.05843484401702881},{"id":"https://openalex.org/C181199279","wikidata":"https://www.wikidata.org/wiki/Q8047","display_name":"Enzyme","level":2,"score":0.0},{"id":"https://openalex.org/C46141821","wikidata":"https://www.wikidata.org/wiki/Q209402","display_name":"Nuclear magnetic resonance","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm62325.2024.10822695","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm62325.2024.10822695","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2061755516","https://openalex.org/W2785273668","https://openalex.org/W2892113269","https://openalex.org/W2943495267","https://openalex.org/W2995514860","https://openalex.org/W3094967361","https://openalex.org/W3104537585","https://openalex.org/W3156278083","https://openalex.org/W3179485843","https://openalex.org/W3193271391","https://openalex.org/W3211024740","https://openalex.org/W4213149192","https://openalex.org/W4223581484","https://openalex.org/W4285294723","https://openalex.org/W4296032638","https://openalex.org/W4366990368","https://openalex.org/W4386942538","https://openalex.org/W4387303685","https://openalex.org/W4388694364","https://openalex.org/W4392095606","https://openalex.org/W4393552888","https://openalex.org/W4396787690","https://openalex.org/W4400978157","https://openalex.org/W4406261070","https://openalex.org/W6782879696","https://openalex.org/W6848511598","https://openalex.org/W6849590751","https://openalex.org/W6854177335","https://openalex.org/W6854866820","https://openalex.org/W6856800273","https://openalex.org/W6858023062","https://openalex.org/W6858453470","https://openalex.org/W6860447507","https://openalex.org/W6860504479","https://openalex.org/W6861854292","https://openalex.org/W6862264409","https://openalex.org/W6862659309","https://openalex.org/W6862837114","https://openalex.org/W6869478886"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W1501776718","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2615136228","https://openalex.org/W2376932109"],"abstract_inverted_index":{"The":[0,115,263],"structural":[1],"similarities":[2,66],"between":[3,67],"protein":[4,39,50,56,68,96,101,135,166,180,188,201,244],"sequences":[5,69],"and":[6,70,77,119,134,140,190,220,233,260,270],"natural":[7,33,71],"languages":[8],"have":[9,25,46],"led":[10],"to":[11,61,110,251],"parallel":[12],"advancements":[13],"in":[14,29,38,74,199,247],"deep":[15,196],"learning":[16,197],"across":[17],"both":[18],"domains.":[19],"While":[20],"large":[21,91],"language":[22,34],"models":[23,198],"(LLMs)":[24],"achieved":[26],"much":[27],"progress":[28],"the":[30,64,88,150,200,216],"domain":[31],"of":[32,132,154,177,218],"processing,":[35],"their":[36],"potential":[37],"engineering":[40,97,181,202,245],"remains":[41],"largely":[42],"unexplored.":[43],"Previous":[44],"approaches":[45],"equipped":[47],"LLMs":[48,106],"with":[49,160],"understanding":[51,168],"capabilities":[52],"by":[53],"incorporating":[54],"external":[55,100],"encoders,":[57],"but":[58],"this":[59,83,241],"fails":[60],"fully":[62],"leverage":[63],"inherent":[65],"languages,":[72],"resulting":[73],"sub-optimal":[75],"performance":[76],"increased":[78,261],"model":[79,92,116,264],"complexity.":[80],"To":[81],"address":[82],"gap,":[84],"we":[85,170,214],"present":[86],"TourSynbio-7B,":[87],"first":[89],"multi-modal":[90],"specifically":[93],"designed":[94],"for":[95,137,143,210],"tasks":[98,246],"without":[99],"encoders.":[102],"TourSynbio-7B":[103,219],"demonstrates":[104],"that":[105,240],"can":[107],"inherently":[108],"learn":[109],"understand":[111],"proteins":[112],"as":[113],"language.":[114],"is":[117],"post-trained":[118],"instruction":[120],"fine-tuned":[121],"on":[122,149,228],"InternLM2-7B":[123],"using":[124],"ProteinLM-Dataset,":[125],"a":[126,152,205],"dataset":[127],"comprising":[128],"17.46":[129],"billion":[130],"tokens":[131],"text":[133],"sequence":[136,167],"self-supervised":[138],"pretraining":[139],"893K":[141],"instructions":[142],"supervised":[144],"fine-tuning.":[145],"TourSynbio7B":[146],"outperforms":[147],"GPT-4":[148],"ProteinLMBench,":[151],"benchmark":[153],"944":[155],"manually":[156],"verified":[157],"multiple-choice":[158],"questions,":[159],"62.18%":[161],"accuracy.":[162],"Leveraging":[163],"TourSynbio-7B\u2019s":[164],"enhanced":[165],"capability,":[169],"introduce":[171],"TourSynbioAgent,":[172],"an":[173],"innovative":[174],"framework":[175],"capable":[176],"performing":[178],"various":[179],"tasks,":[182],"including":[183],"mutation":[184],"analysis,":[185],"inverse":[186],"folding,":[187,189],"visualization.":[191],"TourSynbio-Agent":[192,221],"integrates":[193],"previously":[194],"disconnected":[195],"domain,":[203],"offering":[204],"unified":[206],"conversational":[207],"user":[208],"interface":[209],"improved":[211,255],"usability.":[212],"Finally,":[213],"demonstrate":[215],"efficacy":[217],"through":[222],"two":[223],"wet":[224,248],"lab":[225],"case":[226],"studies":[227],"vanilla":[229],"key":[230],"enzyme":[231],"modification":[232],"steroid":[234],"compound":[235],"catalysis.":[236],"Our":[237],"results":[238],"show":[239],"combination":[242],"facilitates":[243],"labs,":[249],"leading":[250],"higher":[252],"positive":[253],"rates,":[254],"mutations,":[256],"shorter":[257],"delivery":[258],"times,":[259],"automation.":[262],"weights":[265],"are":[266],"available":[267],"at":[268,272],"https://huggingface.co/tsynbio/Toursynbio":[269],"codes":[271],"https://github.com/tsynbio/TourSynbio.":[273]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
