{"id":"https://openalex.org/W4321616023","doi":"https://doi.org/10.1186/s13321-023-00693-0","title":"Reconstruction of lossless molecular representations from fingerprints","display_name":"Reconstruction of lossless molecular representations from fingerprints","publication_year":2023,"publication_date":"2023-02-23","ids":{"openalex":"https://openalex.org/W4321616023","doi":"https://doi.org/10.1186/s13321-023-00693-0","pmid":"https://pubmed.ncbi.nlm.nih.gov/36823647"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-023-00693-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00693-0","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00693-0","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00693-0","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021698978","display_name":"Umit Volkan Ucak","orcid":"https://orcid.org/0000-0002-9088-0915"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Umit V. Ucak","raw_affiliation_strings":["Research Institute of Pharmaceutical Science, College of Pharmacy, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul, 08826, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Research Institute of Pharmaceutical Science, College of Pharmacy, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul, 08826, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070656627","display_name":"Islambek Ashyrmamatov","orcid":"https://orcid.org/0000-0001-6704-4233"},"institutions":[{"id":"https://openalex.org/I165507594","display_name":"Kangwon National University","ror":"https://ror.org/01mh5ph17","country_code":"KR","type":"education","lineage":["https://openalex.org/I165507594"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Islambek Ashyrmamatov","raw_affiliation_strings":["Department of Chemistry, Kangwon National University, Chuncheon, 24341, Republic of Korea","Department of Chemistry, Kangwon National University, Chuncheon,  24341, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Chemistry, Kangwon National University, Chuncheon, 24341, Republic of Korea","institution_ids":["https://openalex.org/I165507594"]},{"raw_affiliation_string":"Department of Chemistry, Kangwon National University, Chuncheon,  24341, Republic of Korea","institution_ids":["https://openalex.org/I165507594"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066026979","display_name":"Juyong Lee","orcid":"https://orcid.org/0000-0003-1174-4358"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Juyong Lee","raw_affiliation_strings":["Molecular Medicine and Biopharmaceutical Sciences, Graduate School of Convergence Science and Technology, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul, 08826, Republic of Korea. nicole23@snu.ac.kr","Research Institute of Pharmaceutical Science, College of Pharmacy, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul, 08826, Republic of Korea. nicole23@snu.ac.kr","Molecular Medicine and Biopharmaceutical Sciences, Graduate School of Convergence Science and Technology, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul,  08826, Republic of Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Molecular Medicine and Biopharmaceutical Sciences, Graduate School of Convergence Science and Technology, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul, 08826, Republic of Korea. nicole23@snu.ac.kr","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Research Institute of Pharmaceutical Science, College of Pharmacy, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul, 08826, Republic of Korea. nicole23@snu.ac.kr","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Molecular Medicine and Biopharmaceutical Sciences, Graduate School of Convergence Science and Technology, Seoul National University, 1 Gwanak-ro, Gwanak-gu, Seoul,  08826, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5066026979"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":5.2468,"has_fulltext":true,"cited_by_count":27,"citation_normalized_percentile":{"value":0.96288515,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"15","issue":"1","first_page":"26","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13192","display_name":"Forensic Fingerprint Detection Methods","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.711452066898346},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.6339856386184692},{"id":"https://openalex.org/keywords/fingerprint","display_name":"Fingerprint (computing)","score":0.4598250687122345},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41837942600250244},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40678879618644714},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.398580402135849},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.300650954246521},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.10594034194946289}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.711452066898346},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.6339856386184692},{"id":"https://openalex.org/C2777826928","wikidata":"https://www.wikidata.org/wiki/Q3745713","display_name":"Fingerprint (computing)","level":2,"score":0.4598250687122345},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41837942600250244},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40678879618644714},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.398580402135849},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.300650954246521},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.10594034194946289}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1186/s13321-023-00693-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00693-0","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00693-0","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:36823647","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36823647","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:9948316","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9948316","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC9948316/pdf/13321_2023_Article_693.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:s-space.snu.ac.kr:10371/192364","is_oa":true,"landing_page_url":"https://hdl.handle.net/10371/192364","pdf_url":"https://s-space.snu.ac.kr/bitstream/10371/192364/1/13321_2023_Article_693.pdf","source":{"id":"https://openalex.org/S4306401345","display_name":"Seoul National University Open Repository (Seoul National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139264467","host_organization_name":"Seoul National University","host_organization_lineage":["https://openalex.org/I139264467"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:8075303bb82e40fdbaaa4e703d96887a","is_oa":true,"landing_page_url":"https://doaj.org/article/8075303bb82e40fdbaaa4e703d96887a","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 15, Iss 1, Pp 1-11 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13321-023-00693-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-023-00693-0","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-023-00693-0","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.75}],"awards":[{"id":"https://openalex.org/G3482102824","display_name":null,"funder_award_id":"2022M3E5F3081268","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G35272312","display_name":null,"funder_award_id":"NRF-2022M3E5F3081268","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G3794639614","display_name":null,"funder_award_id":"2022R1C1C1005080","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G4806438613","display_name":null,"funder_award_id":"KEITI:2020002960002","funder_id":"https://openalex.org/F4320334877","funder_display_name":"Korea Environmental Industry and Technology Institute"},{"id":"https://openalex.org/G6035103128","display_name":null,"funder_award_id":"2019M3E5D4066898","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G6380830294","display_name":null,"funder_award_id":"NRF-2022M3E5F3081268","funder_id":"https://openalex.org/F4320320671","funder_display_name":"National Research Foundation"},{"id":"https://openalex.org/G7099795083","display_name":null,"funder_award_id":"2019M3E5D4066898","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G765297136","display_name":null,"funder_award_id":"KEITI:2020002960002","funder_id":"https://openalex.org/F4320322007","funder_display_name":"Ministry of Environment"},{"id":"https://openalex.org/G7841737093","display_name":null,"funder_award_id":"NTIS:1485017120","funder_id":"https://openalex.org/F4320322007","funder_display_name":"Ministry of Environment"},{"id":"https://openalex.org/G8104226562","display_name":null,"funder_award_id":"2020002960002","funder_id":"https://openalex.org/F4320322007","funder_display_name":"Ministry of Environment"},{"id":"https://openalex.org/G8855870903","display_name":null,"funder_award_id":"NRF-2019M3E5D4066898","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G8877558626","display_name":null,"funder_award_id":"NRF-2020M3A9G7103933","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G8959955562","display_name":null,"funder_award_id":"2020M3A9G7103933","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320322007","display_name":"Ministry of Environment","ror":"https://ror.org/04xmt0833"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320322724","display_name":"Ministry of Education, India","ror":"https://ror.org/048xjjh50"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320334877","display_name":"Korea Environmental Industry and Technology Institute","ror":"https://ror.org/022r1q746"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4321616023.pdf","grobid_xml":"https://content.openalex.org/works/W4321616023.grobid-xml"},"referenced_works_count":52,"referenced_works":["https://openalex.org/W1601495365","https://openalex.org/W1975147762","https://openalex.org/W1988037271","https://openalex.org/W2016979469","https://openalex.org/W2021748110","https://openalex.org/W2038702914","https://openalex.org/W2044834685","https://openalex.org/W2046376233","https://openalex.org/W2049654081","https://openalex.org/W2064440950","https://openalex.org/W2119289922","https://openalex.org/W2170973067","https://openalex.org/W2172216479","https://openalex.org/W2200017991","https://openalex.org/W2405035126","https://openalex.org/W2529996553","https://openalex.org/W2558999090","https://openalex.org/W2621742623","https://openalex.org/W2736137960","https://openalex.org/W2900694120","https://openalex.org/W2914635984","https://openalex.org/W2916877561","https://openalex.org/W2934842096","https://openalex.org/W2947423323","https://openalex.org/W2953128081","https://openalex.org/W2963609389","https://openalex.org/W2972608805","https://openalex.org/W2973074478","https://openalex.org/W2994678679","https://openalex.org/W2999373911","https://openalex.org/W3004633195","https://openalex.org/W3009202547","https://openalex.org/W3009321976","https://openalex.org/W3045928028","https://openalex.org/W3085821739","https://openalex.org/W3088265803","https://openalex.org/W3090513030","https://openalex.org/W3094771832","https://openalex.org/W3098269892","https://openalex.org/W3103092523","https://openalex.org/W3106761016","https://openalex.org/W3119022334","https://openalex.org/W3120024000","https://openalex.org/W3129831491","https://openalex.org/W3164193774","https://openalex.org/W3195604886","https://openalex.org/W4220670676","https://openalex.org/W4229590462","https://openalex.org/W4240563808","https://openalex.org/W4288044862","https://openalex.org/W6602480875","https://openalex.org/W6609581451"],"related_works":["https://openalex.org/W3106969033","https://openalex.org/W2186939576","https://openalex.org/W2357988910","https://openalex.org/W2948148442","https://openalex.org/W2377158164","https://openalex.org/W2187600494","https://openalex.org/W2114030128","https://openalex.org/W2135707701","https://openalex.org/W2095678457","https://openalex.org/W2352493357"],"abstract_inverted_index":{"The":[0],"simplified":[1],"molecular-input":[2],"line-entry":[3],"system":[4],"(SMILES)":[5],"is":[6,77,100],"the":[7,24,36,81,92,132,149],"most":[8],"prevalent":[9],"molecular":[10,42],"representation":[11],"used":[12,62,118],"in":[13,142],"AI-based":[14],"chemical":[15,69,124],"applications.":[16,75,126],"However,":[17],"there":[18],"are":[19,59],"innate":[20],"limitations":[21],"associated":[22],"with":[23,88],"internal":[25],"structure":[26],"of":[27,40,55,123,135,151],"SMILES":[28,45],"representations.":[29],"In":[30],"this":[31,33,128],"context,":[32],"study":[34,130],"exploits":[35],"resolution":[37],"and":[38,46,61,70,113,121,159],"robustness":[39],"unique":[41],"representations,":[43],"i.e.,":[44],"SELFIES":[47],"(SELF-referencIng":[48],"Embedded":[49],"strings),":[50],"reconstructed":[51],"from":[52],"a":[53],"set":[54],"structural":[56,105,136],"fingerprints,":[57,106],"which":[58],"proposed":[60],"herein":[63],"as":[64,119],"vital":[65],"representational":[66],"tools":[67],"for":[68,157],"natural":[71],"language":[72],"processing":[73],"(NLP)":[74],"This":[76],"achieved":[78],"by":[79],"restoring":[80],"connectivity":[82],"information":[83],"lost":[84],"during":[85],"fingerprint":[86],"transformation":[87],"high":[89],"accuracy.":[90],"Notably,":[91],"results":[93],"reveal":[94],"that":[95,138],"seemingly":[96],"irreversible":[97],"molecule-to-fingerprint":[98],"conversion":[99],"feasible.":[101],"More":[102],"specifically,":[103],"four":[104],"extended":[107],"connectivity,":[108],"topological":[109],"torsion,":[110],"atom":[111],"pairs,":[112],"atomic":[114],"environments":[115],"can":[116],"be":[117],"inputs":[120],"outputs":[122],"NLP":[125,143],"Therefore,":[127],"comprehensive":[129],"addresses":[131],"major":[133],"limitation":[134],"fingerprints":[137],"precludes":[139],"their":[140],"use":[141],"models.":[144],"Our":[145],"findings":[146],"will":[147],"facilitate":[148],"development":[150],"text-":[152],"or":[153],"fingerprint-based":[154],"chemoinformatic":[155],"models":[156],"generative":[158],"translational":[160],"tasks.":[161]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
