{"id":"https://openalex.org/W4404965619","doi":"https://doi.org/10.1186/s13321-024-00932-y","title":"Sort &amp; Slice: a simple and superior alternative to hash-based folding for extended-connectivity fingerprints","display_name":"Sort &amp; Slice: a simple and superior alternative to hash-based folding for extended-connectivity fingerprints","publication_year":2024,"publication_date":"2024-12-03","ids":{"openalex":"https://openalex.org/W4404965619","doi":"https://doi.org/10.1186/s13321-024-00932-y","pmid":"https://pubmed.ncbi.nlm.nih.gov/39627861"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-024-00932-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-024-00932-y","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00932-y","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00932-y","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049022555","display_name":"Markus Dablander","orcid":null},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Markus Dablander","raw_affiliation_strings":["Mathematical Institute, University of Oxford, Andrew Wiles Building, Radcliffe Observatory Quarter (550), Woodstock Road, Oxford, OX2 6GG, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mathematical Institute, University of Oxford, Andrew Wiles Building, Radcliffe Observatory Quarter (550), Woodstock Road, Oxford, OX2 6GG, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082247372","display_name":"Thierry Hanser","orcid":"https://orcid.org/0000-0002-0392-6154"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thierry Hanser","raw_affiliation_strings":["Lhasa Limited, Granary Wharf House, 2 Canal Wharf, Leeds, LS11 5PS, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lhasa Limited, Granary Wharf House, 2 Canal Wharf, Leeds, LS11 5PS, UK","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034372799","display_name":"Renaud Lambiotte","orcid":"https://orcid.org/0000-0002-0583-4595"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Renaud Lambiotte","raw_affiliation_strings":["Mathematical Institute, University of Oxford, Andrew Wiles Building, Radcliffe Observatory Quarter (550), Woodstock Road, Oxford, OX2 6GG, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mathematical Institute, University of Oxford, Andrew Wiles Building, Radcliffe Observatory Quarter (550), Woodstock Road, Oxford, OX2 6GG, UK","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011245839","display_name":"Garrett M. Morris","orcid":"https://orcid.org/0000-0003-1731-8405"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Garrett M. Morris","raw_affiliation_strings":["Department of Statistics, University of Oxford, 24-29 St Giles', Oxford, OX1 3LB, UK. garrett.morris@stats.ox.ac.uk","Department of Statistics, University of Oxford, 24-29 St Giles', Oxford, OX1 3LB, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Statistics, University of Oxford, 24-29 St Giles', Oxford, OX1 3LB, UK. garrett.morris@stats.ox.ac.uk","institution_ids":["https://openalex.org/I40120149"]},{"raw_affiliation_string":"Department of Statistics, University of Oxford, 24-29 St Giles', Oxford, OX1 3LB, UK","institution_ids":["https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5011245839"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":2.6193,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91234325,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"16","issue":"1","first_page":"135","last_page":"135"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7775973677635193},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.7133187055587769},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.681335985660553},{"id":"https://openalex.org/keywords/folding","display_name":"Folding (DSP implementation)","score":0.5623175501823425},{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.5090789198875427},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.48437198996543884},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4176328182220459},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4094497561454773},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3447956442832947},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34356558322906494},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1121819019317627},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.074974924325943}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7775973677635193},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.7133187055587769},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.681335985660553},{"id":"https://openalex.org/C2776545253","wikidata":"https://www.wikidata.org/wiki/Q5464292","display_name":"Folding (DSP implementation)","level":2,"score":0.5623175501823425},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.5090789198875427},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.48437198996543884},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4176328182220459},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4094497561454773},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3447956442832947},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34356558322906494},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1121819019317627},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.074974924325943},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1186/s13321-024-00932-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-024-00932-y","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00932-y","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:39627861","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39627861","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11616156","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11616156","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11616156/pdf/13321_2024_Article_932.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:cac831bacf7d485da76348f7818e1c20","is_oa":false,"landing_page_url":"https://doaj.org/article/cac831bacf7d485da76348f7818e1c20","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 16, Iss 1, Pp 1-20 (2024)","raw_type":"article"},{"id":"pmh:oai:ora.ox.ac.uk:uuid:c63dca6e-4205-4926-a988-0c999febc501","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306402636","display_name":"Oxford University Research Archive (ORA) (University of Oxford)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40120149","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/I40120149"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Symplectic Elements","raw_type":"Journal article"}],"best_oa_location":{"id":"doi:10.1186/s13321-024-00932-y","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-024-00932-y","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00932-y","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1812176928","display_name":null,"funder_award_id":"EP/V013068/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G3313373374","display_name":null,"funder_award_id":"EP/V03474X/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320320290","display_name":"University of Oxford","ror":"https://ror.org/052gg0110"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404965619.pdf"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1975147762","https://openalex.org/W1985747475","https://openalex.org/W1988037271","https://openalex.org/W1995875735","https://openalex.org/W2031648200","https://openalex.org/W2044834685","https://openalex.org/W2045673749","https://openalex.org/W2060531713","https://openalex.org/W2066532920","https://openalex.org/W2119512897","https://openalex.org/W2123676318","https://openalex.org/W2200017991","https://openalex.org/W2327127510","https://openalex.org/W2540970325","https://openalex.org/W2594183968","https://openalex.org/W2806547269","https://openalex.org/W2901476322","https://openalex.org/W2901732000","https://openalex.org/W2905012389","https://openalex.org/W2907492528","https://openalex.org/W2957050889","https://openalex.org/W2966357564","https://openalex.org/W2966797369","https://openalex.org/W2977768120","https://openalex.org/W2997997679","https://openalex.org/W3015572666","https://openalex.org/W3016297257","https://openalex.org/W3019246273","https://openalex.org/W3035302862","https://openalex.org/W3035965352","https://openalex.org/W3038823641","https://openalex.org/W3082081167","https://openalex.org/W3085821739","https://openalex.org/W3099878876","https://openalex.org/W3110901318","https://openalex.org/W3124172763","https://openalex.org/W4213151958","https://openalex.org/W4220670676","https://openalex.org/W4295312788","https://openalex.org/W4366188919","https://openalex.org/W4382334246","https://openalex.org/W4383216745","https://openalex.org/W6601955380","https://openalex.org/W6941255991"],"related_works":["https://openalex.org/W2953234277","https://openalex.org/W2626256601","https://openalex.org/W147410782","https://openalex.org/W2900413183","https://openalex.org/W4390975304","https://openalex.org/W3022252430","https://openalex.org/W4287804464","https://openalex.org/W3103989898","https://openalex.org/W3023219121","https://openalex.org/W2035244949"],"abstract_inverted_index":{"Extended-connectivity":[0],"fingerprints":[1,81,270],"(ECFPs)":[2],"are":[3,56,157],"a":[4,42,65,72,83,97,139,162,285],"ubiquitous":[5],"tool":[6],"in":[7,138],"current":[8],"cheminformatics":[9],"and":[10,14,96,114,145,180,187,232,274,278,288,299],"molecular":[11,20,192,256,306],"machine":[12,257],"learning,":[13],"one":[15],"of":[16,45,52,79,100,123,142,165,174,268,281,294],"the":[17,77,121,151,172,218,247,266,275,292],"most":[18,153],"prevalent":[19],"feature":[21],"extraction":[22],"techniques":[23],"used":[24,159],"for":[25,76,120,190,254,265,291],"chemical":[26],"prediction.":[27,194,308],"Atom":[28],"features":[29],"learned":[30],"by":[31,57],"graph":[32,46],"neural":[33],"networks":[34],"can":[35],"be":[36],"aggregated":[37],"to":[38,107,117,134,160,251],"compound-level":[39],"representations":[40],"using":[41,63],"large":[43],"spectrum":[44],"pooling":[47,88,122,293],"methods.":[48],"In":[49],"contrast,":[50],"sets":[51],"detected":[53],"ECFP":[54,124,131,233,295],"substructures":[55,132,155,296],"default":[58,248],"transformed":[59],"into":[60],"bit":[61],"vectors":[62],"only":[64],"simple":[66,287],"hash-based":[67,91,118,175,213,244,303],"folding":[68,119,214,245,304],"procedure.":[69],"We":[70,104,169,235],"introduce":[71],"general":[73,262],"mathematical":[74,263],"framework":[75,264],"vectorisation":[78,267],"structural":[80,269],"via":[82],"formal":[84],"operation":[85],"called":[86,271],"substructure":[87,94,272],"that":[89,238,297],"encompasses":[90],"folding,":[92,176],"algorithmic":[93],"selection,":[95],"wide":[98],"variety":[99],"other":[101,219],"potential":[102],"techniques.":[103],"go":[105],"on":[106],"describe":[108],"Sort":[109,126,177,203,239,282],"&":[110,127,178,204,240,283],"Slice,":[111,179,284],"an":[112],"easy-to-implement":[113],"bit-collision-free":[115,289],"alternative":[116],"substructures.":[125],"Slice":[128,205,241],"first":[129],"sorts":[130],"according":[133],"their":[135],"relative":[136],"prevalence":[137],"given":[140],"set":[141],"training":[143],"compounds":[144],"then":[146],"slices":[147],"away":[148],"all":[149],"but":[150],"L":[152],"frequent":[154],"which":[156],"subsequently":[158],"generate":[161],"binary":[163],"fingerprint":[164],"desired":[166],"length,":[167],"L.":[168],"computationally":[170],"compare":[171],"performance":[173],"two":[181],"advanced":[182],"supervised":[183,255],"substructure-selection":[184],"schemes":[185],"(filtering":[186],"mutual-information":[188],"maximisation)":[189],"ECFP-based":[191],"property":[193,307],"Our":[195],"results":[196],"indicate":[197],"that,":[198],"despite":[199],"its":[200],"technical":[201,276],"simplicity,":[202],"robustly":[206,298],"(and":[207],"at":[208,305],"times":[209],"substantially)":[210],"outperforms":[211,301],"traditional":[212],"as":[215,217,246],"well":[216],"investigated":[220],"substructure-pooling":[221,249],"methods":[222],"across":[223],"distinct":[224],"prediction":[225],"tasks,":[226],"data":[227],"splitting":[228],"techniques,":[229],"machine-learning":[230],"models":[231],"hyperparameters.":[234],"thus":[236],"recommend":[237],"canonically":[242],"replace":[243],"technique":[250],"vectorise":[252],"ECFPs":[253],"learning.":[258],"Scientific":[259],"contribution":[260],"A":[261],"pooling;":[273],"description":[277],"computational":[279],"evaluation":[280],"conceptually":[286],"method":[290],"markedly":[300],"classical":[302]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-10-10T00:00:00"}
