{"id":"https://openalex.org/W6942309783","doi":"https://doi.org/10.14279/depositonce-19111","title":"Leveraging topological information in protein structure prediction","display_name":"Leveraging topological information in protein structure prediction","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W6942309783","doi":"https://doi.org/10.14279/depositonce-19111"},"language":"en","primary_location":{"id":"pmh:oai:depositonce.tu-berlin.de:11303/20313","is_oa":true,"landing_page_url":"https://depositonce.tu-berlin.de/handle/11303/20313","pdf_url":null,"source":{"id":"https://openalex.org/S4406922277","display_name":"DepositOnce","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://depositonce.tu-berlin.de/handle/11303/20313","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Mabrouk, Mahmoud","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mabrouk, Mahmoud","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.5405427,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10451","display_name":"Mycorrhizal Fungi and Plant Interactions","score":0.5026000142097473,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10451","display_name":"Mycorrhizal Fungi and Plant Interactions","score":0.5026000142097473,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.09950000047683716,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10825","display_name":"Plant Pathogens and Fungal Diseases","score":0.04149999842047691,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6474000215530396},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.5838000178337097},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5547999739646912},{"id":"https://openalex.org/keywords/protein-structure-prediction","display_name":"Protein structure prediction","score":0.5066999793052673},{"id":"https://openalex.org/keywords/protein-structure-database","display_name":"Protein structure database","score":0.49149999022483826},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4702000021934509},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4287000000476837},{"id":"https://openalex.org/keywords/protein-sequencing","display_name":"Protein sequencing","score":0.3968999981880188},{"id":"https://openalex.org/keywords/chemical-space","display_name":"Chemical space","score":0.36640000343322754}],"concepts":[{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6474000215530396},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6122000217437744},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.5838000178337097},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5547999739646912},{"id":"https://openalex.org/C18051474","wikidata":"https://www.wikidata.org/wiki/Q899656","display_name":"Protein structure prediction","level":3,"score":0.5066999793052673},{"id":"https://openalex.org/C136475424","wikidata":"https://www.wikidata.org/wiki/Q7251500","display_name":"Protein structure database","level":4,"score":0.49149999022483826},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4287000000476837},{"id":"https://openalex.org/C10010492","wikidata":"https://www.wikidata.org/wiki/Q3142557","display_name":"Protein sequencing","level":4,"score":0.3968999981880188},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3804999887943268},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36730000376701355},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.36640000343322754},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3610000014305115},{"id":"https://openalex.org/C204328495","wikidata":"https://www.wikidata.org/wiki/Q847556","display_name":"Protein folding","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3456000089645386},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34380000829696655},{"id":"https://openalex.org/C191120209","wikidata":"https://www.wikidata.org/wiki/Q908902","display_name":"Structural biology","level":2,"score":0.33239999413490295},{"id":"https://openalex.org/C200307862","wikidata":"https://www.wikidata.org/wiki/Q7797175","display_name":"Threading (protein sequence)","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C147816474","wikidata":"https://www.wikidata.org/wiki/Q169525","display_name":"Protein engineering","level":3,"score":0.2980000078678131},{"id":"https://openalex.org/C4668613","wikidata":"https://www.wikidata.org/wiki/Q4116110","display_name":"Structural alignment","level":5,"score":0.2944999933242798},{"id":"https://openalex.org/C152769699","wikidata":"https://www.wikidata.org/wiki/Q410814","display_name":"Protein design","level":3,"score":0.287200003862381},{"id":"https://openalex.org/C2777936996","wikidata":"https://www.wikidata.org/wiki/Q17148461","display_name":"Conformational ensembles","level":3,"score":0.2840000092983246},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2838999927043915},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C119145174","wikidata":"https://www.wikidata.org/wiki/Q7251429","display_name":"Protein Data Bank","level":3,"score":0.2709999978542328},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.2669999897480011},{"id":"https://openalex.org/C144292202","wikidata":"https://www.wikidata.org/wiki/Q898273","display_name":"Protein domain","level":3,"score":0.26190000772476196},{"id":"https://openalex.org/C178180057","wikidata":"https://www.wikidata.org/wiki/Q7251477","display_name":"Protein superfamily","level":3,"score":0.25760000944137573},{"id":"https://openalex.org/C69131567","wikidata":"https://www.wikidata.org/wiki/Q2068215","display_name":"Structural bioinformatics","level":3,"score":0.2549999952316284},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C45484198","wikidata":"https://www.wikidata.org/wiki/Q827246","display_name":"Sequence alignment","level":4,"score":0.2533000111579895},{"id":"https://openalex.org/C30711495","wikidata":"https://www.wikidata.org/wiki/Q289411","display_name":"Sequence space","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:depositonce.tu-berlin.de:11303/20313","is_oa":true,"landing_page_url":"https://depositonce.tu-berlin.de/handle/11303/20313","pdf_url":null,"source":{"id":"https://openalex.org/S4406922277","display_name":"DepositOnce","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},{"id":"doi:10.14279/depositonce-19111","is_oa":true,"landing_page_url":"https://doi.org/10.14279/depositonce-19111","pdf_url":null,"source":{"id":"https://openalex.org/S7407052985","display_name":"Technische Universit\u00e4t Berlin \u2013 Universit\u00e4tsbibliothek","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:depositonce.tu-berlin.de:11303/20313","is_oa":true,"landing_page_url":"https://depositonce.tu-berlin.de/handle/11303/20313","pdf_url":null,"source":{"id":"https://openalex.org/S4406922277","display_name":"DepositOnce","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"acceptedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Protein":[0],"structure":[1,31,172,217,239,310,398,427,485,502,575,633],"prediction":[2,32,47,311,486,503,518,576,613,634],"is":[3,63,86,104,180,192,221,251,405,421,620],"a":[4,34,193,252,308,406,418,500,524,570,643],"crucial":[5],"task":[6],"in":[7,80,130,186,281,322,463,547,605,611,653],"computational":[8],"biology":[9],"with":[10,92,101,335,431,447,523,589],"significant":[11,628],"implications":[12,629],"for":[13,22,170,519,573,615,630],"drug":[14],"development,":[15],"and":[16,27,299,302,368,401,466,480,521,536,549,597,668],"understanding":[17],"biological":[18],"processes.":[19],"Experimental":[20],"methods":[21,76,103,584],"protein":[23,30,171,178,216,228,257,269,297,309,410,468,501,574,632,664],"determination":[24],"are":[25,77,124],"expensive":[26],"time-consuming.":[28],"Computational":[29],"provides":[33,594],"feasible":[35],"alternative.":[36],"Although":[37],"end-to-end":[38],"deep-learning":[39,583],"methods,":[40],"like":[41],"Alphafold":[42],"2,":[43],"have":[44,154],"significantly":[45],"improved":[46],"quality":[48],"by":[49,233,510,585],"utilizing":[50],"co-evolutionary":[51,84,108,115,147,339,362,370,448,457,511,590,618],"information":[52,62,85,116,458,490,619],"from":[53,399,470,533],"multiple":[54,93,471,481],"sequence":[55,472,482],"alignments,":[56],"they":[57],"remain":[58],"limited":[59],"when":[60,118],"such":[61,88,662],"unavailable,":[64],"which":[65,128,191,317],"pertains":[66],"to":[67,112,143,157,188,206,211,290,349,377,396,423,459,542,658],"approximately":[68],"20%":[69],"of":[70,146,161,176,196,214,225,236,246,255,267,284,295,331,338,345,352,360,381,409,417,428,527,557,581,600,608,641,646],"all":[71],"metagenomic":[72],"proteins.":[73,97],"Furthermore,":[74],"these":[75,102],"less":[78],"effective":[79,598],"downstream":[81,631],"tasks":[82],"where":[83,617],"uninformative,":[87],"as":[89,663],"predicting":[90],"proteins":[91,430,522,562,616],"conformations":[94,224,291],"or":[95,622],"disordered":[96],"The":[98],"primary":[99],"challenge":[100],"their":[105,237],"over-reliance":[106],"on":[107],"information.":[109,148,340,363,449,512,591],"They":[110],"tend":[111],"over-exploit":[113],"the":[114,119,144,158,162,166,174,197,208,215,234,244,278,282,288,319,323,329,332,336,343,353,361,365,369,375,378,382,385,415,425,438,461,494,543,555,579,602,606,647],"even":[117],"Multiple":[120],"Sequence":[121],"Alignments":[122],"(MSAs)":[123],"not":[125,181],"informative":[126],"enough":[127],"results":[129],"wrong":[131],"predictions.":[132],"To":[133],"address":[134],"this":[135,201,247,262],"issue,":[136],"we":[137,203,276,388,442,451,454,497,553],"propose":[138,637],"adding":[139],"conformational":[140,152,163,198,209,279,320,333,354,538,603],"space":[141,169,175,283,334,344,355,465,539,607,649],"exploration":[142,330],"exploitation":[145,337],"Past":[149],"approaches":[150],"attempting":[151],"search":[153,210,280,289,321,376,462,508,588,604,654],"struggled":[155],"due":[156],"vast":[159],"size":[160],"space.":[164,199,325,383],"However,":[165],"actual":[167],"solution":[168,648],"prediction,":[173],"natural":[177,226,256,268,296],"structures,":[179],"very":[182],"large:":[183],"Proteins":[184],"cluster":[185],"1,000":[187],"10,000":[189],"folds,":[190],"minuscule":[194],"region":[195],"In":[200,384,437,493],"thesis,":[202],"explore":[204],"how":[205],"constrain":[207],"an":[212,394,595],"estimate":[213],"universe.":[218],"Our":[219,326,592,624],"insight":[220],"that":[222,243,261,356,403,414,453,477,505,515,577,638],"most":[223,429],"(globular)":[227],"structures":[229,270,298],"can":[230,455,655],"be":[231,656],"represented":[232],"arrangement":[235],"secondary":[238],"elements.":[240],"We":[241,306,341,412,474,513,636],"hypothesize":[242],"representation":[245,254,263,408,645],"arrangement,":[248],"called":[249,313],"topology,":[250,400],"latent":[253,407,644],"structures.":[258,305,411,551],"This":[259,530],"means":[260],"encodes":[264],"essential":[265,293],"features":[266,294],"while":[271],"discarding":[272],"unimportant":[273],"characteristics.":[274],"Thus,":[275],"conduct":[277],"topologies,":[285,609],"limiting":[286],"thereby":[287],"possessing":[292],"disregarding":[300],"physically":[301],"evolutionarily":[303],"improbable":[304],"develop":[307],"approach":[312,327,395,446,504,572,593,640],"Topology-Based":[314],"Search":[315],"(TBS),":[316],"conducts":[318],"topological":[324,366,391,464,507],"combines":[328],"sample":[342],"topologies":[346],"restricting":[347],"it":[348,652],"relevant":[350,379],"regions":[351,380],"also":[357,626],"satisfy":[358],"some":[359],"Both":[364],"prior":[367,371],"help":[372],"us":[373],"limit":[374],"first":[386],"chapter,":[387,496],"define":[389],"our":[390,445,558,567,639],"representation,":[392],"devise":[393],"predict":[397,467],"demonstrate":[402,452],"topology":[404,420,469,479],"show":[413,476],"knowledge":[416],"protein's":[419],"sufficient":[422],"determine":[424],"native":[426],"medium":[432],"accuracy":[433,614],"(TM-score":[434],"&gt;":[435],"0.5).":[436],"next":[439],"two":[440],"chapters,":[441],"investigate":[443],"combining":[444,478,586],"First,":[450],"use":[456],"guide":[460],"alignments.":[473],"then":[475,650],"alignments":[483],"improves":[484],"over":[487],"using":[488,651],"either":[489],"source":[491],"alone.":[492],"final":[495],"introduce":[498],"TBS,":[499],"employs":[506],"guided":[509],"reveal":[514],"TBS":[516],"enhances":[517],"beta-proteins":[520],"high":[525],"proportion":[526],"beta":[528],"residues.":[529],"improvement":[531],"stems":[532],"both":[534],"efficient":[535,596],"increased":[537],"sampling":[540],"compared":[541],"baseline":[544],"(trRosetta)":[545],"resulting":[546,610],"diverse":[548],"realistic":[550],"Last,":[552],"discuss":[554],"limitations":[556,580],"approach,":[559],"particularly":[560],"regarding":[561],"containing":[563],"alpha":[564],"helices.":[565],"Overall,":[566],"thesis":[568],"presents":[569],"novel":[571],"addresses":[578],"current":[582],"topology-based":[587],"way":[599],"conducting":[601],"better":[612],"unavailable":[621],"uninformative.":[623],"work":[625],"has":[627],"tasks.":[635],"learning":[642],"extended":[657],"other":[659],"protein-related":[660],"problems,":[661],"design,":[665],"protein-protein":[666],"interactions":[667],"protein-ligand":[669],"binding.":[670]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
