{"id":"https://openalex.org/W4415618177","doi":"https://doi.org/10.1186/s13321-025-01098-x","title":"HTA - An open-source software for assigning head and tail positions\u00a0to\u00a0monomer SMILES in polymerization reactions","display_name":"HTA - An open-source software for assigning head and tail positions\u00a0to\u00a0monomer SMILES in polymerization reactions","publication_year":2025,"publication_date":"2025-10-28","ids":{"openalex":"https://openalex.org/W4415618177","doi":"https://doi.org/10.1186/s13321-025-01098-x","pmid":"https://pubmed.ncbi.nlm.nih.gov/41152926"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-025-01098-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-025-01098-x","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01098-x","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01098-x","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064374279","display_name":"Brenda de Souza Ferrari","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113516","display_name":"IBM Research - Brazil","ror":"https://ror.org/01fxqdx25","country_code":"BR","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113516","https://openalex.org/I4210114115"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Brenda de Souza Ferrari","raw_affiliation_strings":["IBM Research, Avenida Rep\u00fablica do Chile, 330, Rio de Janeiro, RJ, 20031-170, Brazil"],"affiliations":[{"raw_affiliation_string":"IBM Research, Avenida Rep\u00fablica do Chile, 330, Rio de Janeiro, RJ, 20031-170, Brazil","institution_ids":["https://openalex.org/I4210113516"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006910532","display_name":"Ronaldo Giro","orcid":"https://orcid.org/0000-0003-2040-7564"},"institutions":[{"id":"https://openalex.org/I4210113516","display_name":"IBM Research - Brazil","ror":"https://ror.org/01fxqdx25","country_code":"BR","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113516","https://openalex.org/I4210114115"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Ronaldo Giro","raw_affiliation_strings":["IBM Research, Rd J Fco Aguirre Proenca Km 9 SP101, Hortol\u00e2ndia, SP, 13186-900, Brazil. rgiro@br.ibm.com","IBM Research, Rd J Fco Aguirre Proenca Km 9 SP101, Hortol\u00e2ndia, SP, 13186-900, Brazil"],"affiliations":[{"raw_affiliation_string":"IBM Research, Rd J Fco Aguirre Proenca Km 9 SP101, Hortol\u00e2ndia, SP, 13186-900, Brazil. rgiro@br.ibm.com","institution_ids":["https://openalex.org/I4210113516"]},{"raw_affiliation_string":"IBM Research, Rd J Fco Aguirre Proenca Km 9 SP101, Hortol\u00e2ndia, SP, 13186-900, Brazil","institution_ids":["https://openalex.org/I4210113516"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001071143","display_name":"M. Steiner","orcid":"https://orcid.org/0000-0003-1528-9292"},"institutions":[{"id":"https://openalex.org/I4210113516","display_name":"IBM Research - Brazil","ror":"https://ror.org/01fxqdx25","country_code":"BR","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113516","https://openalex.org/I4210114115"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Mathias B. Steiner","raw_affiliation_strings":["IBM Research, Avenida Rep\u00fablica do Chile, 330, Rio de Janeiro, RJ, 20031-170, Brazil. mathiast@br.ibm.com","IBM Research, Avenida Rep\u00fablica do Chile, 330, Rio de Janeiro, RJ, 20031-170, Brazil"],"affiliations":[{"raw_affiliation_string":"IBM Research, Avenida Rep\u00fablica do Chile, 330, Rio de Janeiro, RJ, 20031-170, Brazil. mathiast@br.ibm.com","institution_ids":["https://openalex.org/I4210113516"]},{"raw_affiliation_string":"IBM Research, Avenida Rep\u00fablica do Chile, 330, Rio de Janeiro, RJ, 20031-170, Brazil","institution_ids":["https://openalex.org/I4210113516"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5001071143","https://openalex.org/A5006910532"],"corresponding_institution_ids":["https://openalex.org/I4210113516"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25385953,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":"1","first_page":"162","last_page":"162"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9185000061988831,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9185000061988831,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10405","display_name":"Advanced Polymer Synthesis and Characterization","score":0.029200000688433647,"subfield":{"id":"https://openalex.org/subfields/1605","display_name":"Organic Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.011699999682605267,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monomer","display_name":"Monomer","score":0.8033000230789185},{"id":"https://openalex.org/keywords/polymerization","display_name":"Polymerization","score":0.588100016117096},{"id":"https://openalex.org/keywords/polymer","display_name":"Polymer","score":0.5483999848365784},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.48170000314712524},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.44909998774528503},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4490000009536743},{"id":"https://openalex.org/keywords/head","display_name":"Head (geology)","score":0.41179999709129333}],"concepts":[{"id":"https://openalex.org/C166940927","wikidata":"https://www.wikidata.org/wiki/Q178827","display_name":"Monomer","level":3,"score":0.8033000230789185},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6448000073432922},{"id":"https://openalex.org/C44228677","wikidata":"https://www.wikidata.org/wiki/Q181898","display_name":"Polymerization","level":3,"score":0.588100016117096},{"id":"https://openalex.org/C521977710","wikidata":"https://www.wikidata.org/wiki/Q81163","display_name":"Polymer","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.48170000314712524},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.44909998774528503},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4490000009536743},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.41179999709129333},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3456000089645386},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.3240000009536743},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.2985999882221222},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2583000063896179},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.2547999918460846},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2533999979496002},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.25}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13321-025-01098-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-025-01098-x","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01098-x","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:41152926","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41152926","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:bb2941d653c94436801b69edc5803edd","is_oa":true,"landing_page_url":"https://doaj.org/article/bb2941d653c94436801b69edc5803edd","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 17, Iss 1, Pp 1-14 (2025)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:11378249","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12570827","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s13321-025-01098-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-025-01098-x","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-025-01098-x","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415618177.pdf","grobid_xml":"https://content.openalex.org/works/W4415618177.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W1508604947","https://openalex.org/W1650728208","https://openalex.org/W1968392598","https://openalex.org/W1975147762","https://openalex.org/W2030971064","https://openalex.org/W2034354062","https://openalex.org/W2038702914","https://openalex.org/W2075728943","https://openalex.org/W2078409902","https://openalex.org/W2083224653","https://openalex.org/W2095700073","https://openalex.org/W2114326383","https://openalex.org/W2123087792","https://openalex.org/W2139078293","https://openalex.org/W2168299072","https://openalex.org/W2169678694","https://openalex.org/W2172216479","https://openalex.org/W2467400748","https://openalex.org/W2490237363","https://openalex.org/W2756519801","https://openalex.org/W2909063104","https://openalex.org/W2932060047","https://openalex.org/W2969849020","https://openalex.org/W2973074478","https://openalex.org/W3007178273","https://openalex.org/W3009042623","https://openalex.org/W3016076495","https://openalex.org/W3033186646","https://openalex.org/W3034563742","https://openalex.org/W3096386116","https://openalex.org/W3114925371","https://openalex.org/W3129864870","https://openalex.org/W3216501087","https://openalex.org/W4292361108","https://openalex.org/W4299576998","https://openalex.org/W4382786986","https://openalex.org/W4392737836","https://openalex.org/W4399322969","https://openalex.org/W6969117866"],"related_works":[],"abstract_inverted_index":{"Artificial":[0],"Intelligence":[1],"(AI)":[2],"techniques":[3],"are":[4,19,110],"transforming":[5],"the":[6,25,32,42,48,56,64,75,78,84,88,102,128,136,150,156,209,216],"computational":[7],"discovery":[8],"and":[9,44,80,107,120,170,192],"design":[10],"of":[11,24,28,50,58,66,77,91,105,130,139,159,166,183],"polymers.":[12],"The":[13,168,185,199],"key":[14],"enablers":[15],"for":[16,101,117,190,214],"polymer":[17,92,148,157],"informatics":[18],"machine-readable":[20],"molecular":[21],"string":[22],"representations":[23],"building":[26],"blocks":[27],"a":[29,61,115,142],"polymer,":[30],"i.e.,":[31],"monomers.":[33],"In":[34,141],"monomer":[35,85,124,161,178],"strings,":[36],"such":[37],"as":[38],"SMILES,":[39,162,179],"symbols":[40],"at":[41,194],"head":[43,79,106,119,169],"tail":[45,81,108,121,171],"atoms":[46,109,122,172],"indicate":[47],"locations":[49],"bond":[51],"formation":[52],"during":[53],"polymerization.":[54],"Since":[55],"linking":[57],"monomers":[59,213],"determines":[60],"polymer's":[62],"properties,":[63],"performance":[65],"AI":[67],"prediction":[68],"models":[69],"will,":[70],"ultimately,":[71],"be":[72],"limited":[73],"by":[74,126,207],"accuracy":[76,165,182],"assignments":[82],"in":[83,95,123,212,219],"SMILES.":[86],"Considering":[87],"large":[89],"number":[90],"precursors":[93],"available":[94,189],"chemical":[96],"data":[97,144,205],"bases,":[98],"reliable":[99],"methods":[100],"automated":[103],"assignment":[104],"needed.":[111],"Here,":[112],"we":[113],"report":[114],"method":[116],"assigning":[118],"SMILES":[125],"analyzing":[127],"reactivity":[129],"their":[131],"functional":[132],"groups":[133],"based":[134],"on":[135],"atomic":[137],"index":[138],"nucleophilicity.":[140],"reference":[143],"set":[145],"containing":[146],"206":[147],"precursors,":[149],"HeadTailAssign":[151],"(HTA)":[152],"algorithm":[153,200],"correctly":[154,174],"predicted":[155],"class":[158],"204":[160],"achieving":[163],"an":[164,181],"99%.":[167],"were":[173],"assigned":[175],"to":[176,204],"187":[177],"representing":[180],"91%.":[184],"HTA":[186],"code":[187],"is":[188],"validation":[191],"reuse":[193],"https://github.com/IBM/HeadTailAssign":[195],".":[196],"SCIENTIFIC":[197],"CONTRIBUTION:":[198],"was":[201],"successfully":[202],"applied":[203],"pre-processing":[206],"tagging":[208],"linkage":[210],"bonds":[211],"defining":[215],"repeat":[217],"units":[218],"polymerization":[220],"reactions.":[221]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-28T00:00:00"}
