{"id":"https://openalex.org/W4399714295","doi":"https://doi.org/10.1186/s13321-024-00868-3","title":"PubChem synonym filtering process using crowdsourcing","display_name":"PubChem synonym filtering process using crowdsourcing","publication_year":2024,"publication_date":"2024-06-16","ids":{"openalex":"https://openalex.org/W4399714295","doi":"https://doi.org/10.1186/s13321-024-00868-3","pmid":"https://pubmed.ncbi.nlm.nih.gov/38880887"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-024-00868-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-024-00868-3","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00868-3","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00868-3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100430147","display_name":"Sunghwan Kim","orcid":"https://orcid.org/0000-0001-9828-2074"},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sunghwan Kim","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090242045","display_name":"Bo Yu","orcid":"https://orcid.org/0000-0003-3952-8921"},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Yu","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100613839","display_name":"Qingliang Li","orcid":"https://orcid.org/0000-0002-6453-236X"},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qingliang Li","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015494748","display_name":"Evan Bolton","orcid":"https://orcid.org/0000-0002-5959-6190"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]},{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Evan E. Bolton","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA. bolton@ncbi.nlm.nih.gov","National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA. bolton@ncbi.nlm.nih.gov","institution_ids":[]},{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, Bethesda, MD, 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100430147"],"corresponding_institution_ids":["https://openalex.org/I1299303238","https://openalex.org/I4210109390"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":0.7154,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.6938399,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"16","issue":"1","first_page":"69","last_page":"69"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13297","display_name":"History and advancements in chemistry","score":0.9695000052452087,"subfield":{"id":"https://openalex.org/subfields/1606","display_name":"Physical and Theoretical Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pubchem","display_name":"PubChem","score":0.9655135869979858},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6913522481918335},{"id":"https://openalex.org/keywords/synonym","display_name":"Synonym (taxonomy)","score":0.6201144456863403},{"id":"https://openalex.org/keywords/voting","display_name":"Voting","score":0.46513575315475464},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.425595223903656},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3298102617263794},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31849902868270874},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1157488226890564},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.11015132069587708}],"concepts":[{"id":"https://openalex.org/C158180186","wikidata":"https://www.wikidata.org/wiki/Q278487","display_name":"PubChem","level":2,"score":0.9655135869979858},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6913522481918335},{"id":"https://openalex.org/C173483453","wikidata":"https://www.wikidata.org/wiki/Q1040689","display_name":"Synonym (taxonomy)","level":3,"score":0.6201144456863403},{"id":"https://openalex.org/C520049643","wikidata":"https://www.wikidata.org/wiki/Q189760","display_name":"Voting","level":3,"score":0.46513575315475464},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.425595223903656},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3298102617263794},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31849902868270874},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1157488226890564},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.11015132069587708},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C157369684","wikidata":"https://www.wikidata.org/wiki/Q34740","display_name":"Genus","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13321-024-00868-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-024-00868-3","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00868-3","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:38880887","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38880887","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11181558","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11181558","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11181558/pdf/13321_2024_Article_868.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:7114178bfe8a4ee48adb958256877300","is_oa":true,"landing_page_url":"https://doaj.org/article/7114178bfe8a4ee48adb958256877300","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 16, Iss 1, Pp 1-22 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13321-024-00868-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-024-00868-3","pdf_url":"https://jcheminf.biomedcentral.com/counter/pdf/10.1186/s13321-024-00868-3","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399714295.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1509844421","https://openalex.org/W1577231857","https://openalex.org/W1969605785","https://openalex.org/W1984678368","https://openalex.org/W1995008770","https://openalex.org/W2002400529","https://openalex.org/W2020556160","https://openalex.org/W2023839852","https://openalex.org/W2041610798","https://openalex.org/W2045873232","https://openalex.org/W2057069496","https://openalex.org/W2084727433","https://openalex.org/W2095663170","https://openalex.org/W2101798695","https://openalex.org/W2104775949","https://openalex.org/W2107919969","https://openalex.org/W2120211169","https://openalex.org/W2122709242","https://openalex.org/W2132520326","https://openalex.org/W2137490801","https://openalex.org/W2152290993","https://openalex.org/W2169997311","https://openalex.org/W2177317049","https://openalex.org/W2188035077","https://openalex.org/W2317066137","https://openalex.org/W2409470256","https://openalex.org/W2479356696","https://openalex.org/W2560264578","https://openalex.org/W2567393658","https://openalex.org/W2801555904","https://openalex.org/W2887044684","https://openalex.org/W2887459817","https://openalex.org/W2899070097","https://openalex.org/W2916534270","https://openalex.org/W2951792120","https://openalex.org/W2953214390","https://openalex.org/W2953336659","https://openalex.org/W2967655665","https://openalex.org/W2999177197","https://openalex.org/W3013445709","https://openalex.org/W3048090444","https://openalex.org/W3097145107","https://openalex.org/W3191852652","https://openalex.org/W4214554708","https://openalex.org/W4246526660","https://openalex.org/W4307468223","https://openalex.org/W4309506674","https://openalex.org/W4387804083","https://openalex.org/W4388928506","https://openalex.org/W6930369291"],"related_works":["https://openalex.org/W2104366126","https://openalex.org/W2547231910","https://openalex.org/W2235675062","https://openalex.org/W2177317049","https://openalex.org/W4293208996","https://openalex.org/W2169546341","https://openalex.org/W2127553917","https://openalex.org/W1505362105","https://openalex.org/W2096560421","https://openalex.org/W4292259388"],"abstract_inverted_index":{"PubChem":[0,25,42,135],"(":[1],"https://pubchem.ncbi.nlm.nih.gov":[2],")":[3],"is":[4,30,277,312,328],"a":[5,39,101,105,219,230,282,294,310],"public":[6],"chemical":[7,16,28,49,102,107,188,199,232,245,341],"information":[8],"resource":[9],"containing":[10],"more":[11],"than":[12],"100":[13],"million":[14],"unique":[15],"structures.":[17],"One":[18],"of":[19,145,180,187,198,252],"the":[20,131,143,152,225,237,243,250],"most":[21,226,238],"requested":[22],"tasks":[23],"in":[24,124,130,151,224,259,284,336,339],"and":[26,72,92,121,160,196,201],"other":[27],"databases":[29,342],"to":[31,56,88,98,104,162,175,229,279,290,299],"search":[32],"chemicals":[33,71],"by":[34,46,53,314],"name":[35,103],"(also":[36],"commonly":[37],"called":[38],"\"chemical":[40],"synonym\").":[41],"performs":[43],"this":[44,253,325],"task":[45],"looking":[47],"up":[48,265],"synonym-structure":[50,125,239,266],"associations":[51,85,126,240,267,286,303,338],"provided":[52,313],"individual":[54],"depositors":[55],"PubChem.":[57,344],"In":[58],"addition,":[59],"these":[60,82],"synonyms":[61,227],"are":[62,86,322],"used":[63],"for":[64,281,307,333],"many":[65,320],"purposes,":[66],"including":[67],"creating":[68],"links":[69],"between":[70,93],"PubMed":[73],"articles":[74],"(using":[75],"Medical":[76],"Subject":[77],"Headings":[78],"(MeSH)":[79],"terms).":[80],"However,":[81,324],"depositor-provided":[83],"name-structure":[84,285,302,337],"subject":[87],"substantial":[89],"discrepancies":[90,123,165],"within":[91],"depositors,":[94],"making":[95],"it":[96,296],"difficult":[97],"unambiguously":[99],"map":[100],"specific":[106],"structure.":[108],"The":[109,134,178],"present":[110],"paper":[111],"describes":[112],"PubChem's":[113,260],"crowdsourcing-based":[114],"synonym":[115,136,311],"filtering":[116,137,261,275,326],"strategy,":[117],"which":[118,149,190],"resolves":[119],"inter-":[120],"intra-depositor":[122,164],"as":[127,129,234,236,268,270],"well":[128,235,269],"chemical-MeSH":[132,271],"associations.":[133,272],"process":[138,262,276,327],"was":[139,182,257],"developed":[140],"based":[141],"on":[142,249],"analysis":[144],"four":[146,207],"crowd-voting":[147],"strategies,":[148],"differ":[150],"consistency":[153,221],"threshold":[154],"value":[155],"employed":[156],"(60%":[157],"vs":[158],"70%)":[159],"how":[161],"resolve":[163],"(a":[166],"single":[167,231],"vote":[168,215],"vs.":[169],"multiple":[170],"votes":[171],"per":[172,216],"depositor)":[173],"prior":[174],"inter-depositor":[176],"crowd-voting.":[177],"agreement":[179],"voting":[181],"determined":[183],"at":[184,242],"six":[185,244],"levels":[186],"equivalency,":[189],"considers":[191],"varying":[192],"isotopic":[193],"composition,":[194],"stereochemistry,":[195],"connectivity":[197],"structures":[200],"their":[202,291],"primary":[203],"components.":[204],"While":[205],"all":[206],"strategies":[208],"showed":[209],"comparable":[210],"results,":[211],"Strategy":[212,255],"I":[213,256],"(one":[214],"depositor":[217,317],"with":[218],"60%":[220],"threshold)":[222],"resulted":[223],"assigned":[228],"structure":[233],"disambiguated":[241],"equivalency":[246],"contexts.":[247],"Based":[248],"results":[251],"study,":[254],"implemented":[258],"that":[263],"cleans":[264],"This":[273],"consistency-based":[274],"designed":[278],"look":[280],"consensus":[283],"but":[287],"cannot":[288],"attest":[289],"correctness.":[292],"As":[293],"result,":[295],"can":[297],"fail":[298],"recognize":[300],"correct":[301],"(or":[304],"incorrect":[305],"ones),":[306],"example,":[308],"when":[309,319],"only":[315],"one":[316],"or":[318],"contributors":[321],"incorrect.":[323],"an":[329],"important":[330],"starting":[331],"point":[332],"quality":[334],"control":[335],"large":[340],"like":[343]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
