{"id":"https://openalex.org/W4409150186","doi":"https://doi.org/10.1145/3690624.3709291","title":"GROOT: Effective Design of Biological Sequences with Limited Experimental Data","display_name":"GROOT: Effective Design of Biological Sequences with Limited Experimental Data","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409150186","doi":"https://doi.org/10.1145/3690624.3709291"},"language":"en","primary_location":{"id":"doi:10.1145/3690624.3709291","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3690624.3709291","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3690624.3709291","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105208592","display_name":"Thanh Tran","orcid":"https://orcid.org/0000-0001-8663-1652"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Thanh V. T. Tran","raw_affiliation_strings":["FPT Software AI Center, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Software AI Center, Hanoi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029125317","display_name":"Nhat Khang Ngo","orcid":"https://orcid.org/0009-0004-8762-9457"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Nhat Khang Ngo","raw_affiliation_strings":["FPT Software AI Center, Ho Chi Minh, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Software AI Center, Ho Chi Minh, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116907208","display_name":"Viet Anh Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Viet Anh Nguyen","raw_affiliation_strings":["FPT Software AI Center, Hanoi, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT Software AI Center, Hanoi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073178563","display_name":"Truong Son Hy","orcid":"https://orcid.org/0000-0002-5092-3757"},"institutions":[{"id":"https://openalex.org/I32389192","display_name":"University of Alabama at Birmingham","ror":"https://ror.org/008s83205","country_code":"US","type":"education","lineage":["https://openalex.org/I32389192"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Truong Son Hy","raw_affiliation_strings":["University of Alabama at Birmingham, Birmingham, AL, USA"],"affiliations":[{"raw_affiliation_string":"University of Alabama at Birmingham, Birmingham, AL, USA","institution_ids":["https://openalex.org/I32389192"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5105208592"],"corresponding_institution_ids":["https://openalex.org/I109689652"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0718226,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1385","last_page":"1396"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11048","display_name":"Bacteriophages and microbial interactions","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6528617739677429},{"id":"https://openalex.org/keywords/biological-data","display_name":"Biological data","score":0.44587448239326477},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14296093583106995},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.10585886240005493}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6528617739677429},{"id":"https://openalex.org/C201797286","wikidata":"https://www.wikidata.org/wiki/Q4914986","display_name":"Biological data","level":2,"score":0.44587448239326477},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14296093583106995},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.10585886240005493}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3690624.3709291","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3690624.3709291","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3690624.3709291","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3690624.3709291","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1992613346","https://openalex.org/W2051434435","https://openalex.org/W2060588922","https://openalex.org/W2064164319","https://openalex.org/W2132202037","https://openalex.org/W2379594833","https://openalex.org/W2519539312","https://openalex.org/W2529996553","https://openalex.org/W2767044445","https://openalex.org/W2949676527","https://openalex.org/W2999905431","https://openalex.org/W3127426316","https://openalex.org/W3185760697","https://openalex.org/W4224044437","https://openalex.org/W4225900888","https://openalex.org/W4297240271","https://openalex.org/W4327550249","https://openalex.org/W4389128743","https://openalex.org/W4392518427"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Latent":[0,87],"space":[1,31],"optimization":[2,43,161],"(LSO)":[3],"is":[4,25,55],"a":[5,29,37,85],"powerful":[6],"method":[7],"for":[8,89,98],"designing":[9],"discrete,":[10],"high-dimensional":[11],"biological":[12,155],"sequences":[13],"that":[14,176],"maximize":[15],"expensive":[16],"black-box":[17,187],"functions,":[18],"such":[19],"as":[20,57],"wet":[21],"lab":[22],"experiments.":[23],"This":[24],"accomplished":[26],"by":[27,82,113],"learning":[28],"latent":[30,104],"from":[32,146,171],"available":[33],"data":[34,54,63,76],"and":[35,111,119,163,165,179,198],"using":[36],"surrogate":[38],"model":[39],"f\u03a6":[40,59],"to":[41,67,127,129,186],"guide":[42],"algorithms":[44],"toward":[45],"optimal":[46],"outputs.":[47],"However,":[48],"existing":[49,181],"methods":[50,182],"struggle":[51],"when":[52],"labeled":[53,62,193],"limited,":[56],"training":[58,75,103,133,148],"with":[60,168],"few":[61],"points":[64],"can":[65],"lead":[66],"subpar":[68],"outputs,":[69],"offering":[70],"no":[71],"advantage":[72],"over":[73],"the":[74,102,132,147],"itself.":[77],"We":[78,150,200],"address":[79],"this":[80],"challenge":[81],"introducing":[83],"GROOT,":[84],"GRaph-based":[86],"SmOOThing":[88],"Biological":[90],"Sequence":[91],"Optimization.":[92],"In":[93],"particular,":[94],"GROOT":[95,152,177],"generates":[96],"pseudo-labels":[97,107],"neighbors":[99],"sampled":[100],"around":[101],"embeddings.":[105],"These":[106],"are":[108],"then":[109],"refined":[110],"smoothed":[112],"Label":[114],"Propagation.":[115],"Additionally,":[116],"we":[117],"theoretically":[118],"empirically":[120],"justify":[121],"our":[122,202],"approach,":[123],"demonstrate":[124,175],"GROOT's":[125],"ability":[126],"extrapolate":[128],"regions":[130],"beyond":[131],"set":[134],"while":[135],"maintaining":[136],"reliability":[137],"within":[138],"an":[139],"upper":[140],"bound":[141],"of":[142,192],"their":[143],"expected":[144],"distances":[145],"regions.":[149],"evaluate":[151],"on":[153],"various":[154],"sequence":[156],"design":[157],"tasks,":[158],"including":[159],"protein":[160],"(GFP":[162],"AAV)":[164],"three":[166],"tasks":[167],"exact":[169],"oracles":[170,188],"Design-Bench.":[172],"The":[173],"results":[174],"equalizes":[178],"surpasses":[180],"without":[183],"requiring":[184],"access":[185],"or":[189],"vast":[190],"amounts":[191],"data,":[194],"highlighting":[195],"its":[196],"practicality":[197],"effectiveness.":[199],"release":[201],"code":[203],"at":[204],"https://github.com/Fsoft-AIC/GROOT.":[205]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
