{"id":"https://openalex.org/W3137474978","doi":"https://doi.org/10.1109/bigdata50022.2020.9377978","title":"A Proficient Spelling Analysis Method Applied to Herbal and Dietary Supplement Discovery in a Large Clinical Corpus","display_name":"A Proficient Spelling Analysis Method Applied to Herbal and Dietary Supplement Discovery in a Large Clinical Corpus","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3137474978","doi":"https://doi.org/10.1109/bigdata50022.2020.9377978","mag":"3137474978"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9377978","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377978","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034770262","display_name":"T. Elizabeth Workman","orcid":"https://orcid.org/0000-0002-2273-078X"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"T. Elizabeth Workman","raw_affiliation_strings":["Biomeidcal Informatics Center, The George Washington University, Washington, D.C., U.S.A"],"affiliations":[{"raw_affiliation_string":"Biomeidcal Informatics Center, The George Washington University, Washington, D.C., U.S.A","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084025490","display_name":"Yijun Shao","orcid":"https://orcid.org/0000-0001-6419-7963"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yijun Shao","raw_affiliation_strings":["Biomeidcal Informatics Center, The George Washington University, Washington, D.C., U.S.A"],"affiliations":[{"raw_affiliation_string":"Biomeidcal Informatics Center, The George Washington University, Washington, D.C., U.S.A","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007564242","display_name":"Guy Divita","orcid":null},"institutions":[{"id":"https://openalex.org/I4210155647","display_name":"National Institutes of Health Clinical Center","ror":"https://ror.org/04vfsmv21","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I4210155647"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guy Divita","raw_affiliation_strings":["Rehabilitation Medicine Dept., Clinical Center National Institutes of Health, Bethesda, MD, U.S.A"],"affiliations":[{"raw_affiliation_string":"Rehabilitation Medicine Dept., Clinical Center National Institutes of Health, Bethesda, MD, U.S.A","institution_ids":["https://openalex.org/I4210155647"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058747294","display_name":"Qing Zeng\u2010Treitler","orcid":"https://orcid.org/0000-0002-8353-7473"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Zeng-Treitler","raw_affiliation_strings":["Biomeidcal Informatics Center, The George Washington University, Washington, D.C., U.S.A"],"affiliations":[{"raw_affiliation_string":"Biomeidcal Informatics Center, The George Washington University, Washington, D.C., U.S.A","institution_ids":["https://openalex.org/I193531525"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5034770262"],"corresponding_institution_ids":["https://openalex.org/I193531525"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14000887,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":null,"first_page":"1004","last_page":"1010"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.972100019454956,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.972100019454956,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.935699999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.970757246017456},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6936298608779907},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6817720532417297},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.6563279628753662},{"id":"https://openalex.org/keywords/medical-prescription","display_name":"Medical prescription","score":0.5807614326477051},{"id":"https://openalex.org/keywords/text-messaging","display_name":"Text messaging","score":0.5736709237098694},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5665139555931091},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36038219928741455},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.24400466680526733},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.20577675104141235},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.17588642239570618}],"concepts":[{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.970757246017456},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6936298608779907},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6817720532417297},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.6563279628753662},{"id":"https://openalex.org/C2426938","wikidata":"https://www.wikidata.org/wiki/Q3355478","display_name":"Medical prescription","level":2,"score":0.5807614326477051},{"id":"https://openalex.org/C3018949938","wikidata":"https://www.wikidata.org/wiki/Q17166101","display_name":"Text messaging","level":2,"score":0.5736709237098694},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5665139555931091},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36038219928741455},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.24400466680526733},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.20577675104141235},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.17588642239570618},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0},{"id":"https://openalex.org/C98274493","wikidata":"https://www.wikidata.org/wiki/Q128406","display_name":"Pharmacology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9377978","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377978","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8100000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W90755025","https://openalex.org/W168564468","https://openalex.org/W1138636859","https://openalex.org/W1536985005","https://openalex.org/W1601247733","https://openalex.org/W1647671624","https://openalex.org/W1666538313","https://openalex.org/W1845214076","https://openalex.org/W1975879668","https://openalex.org/W1984291951","https://openalex.org/W2013413947","https://openalex.org/W2096797897","https://openalex.org/W2126374659","https://openalex.org/W2128279348","https://openalex.org/W2160670372","https://openalex.org/W2166975295","https://openalex.org/W2463713401","https://openalex.org/W2531708927","https://openalex.org/W2615910808","https://openalex.org/W2913213577","https://openalex.org/W6606409850","https://openalex.org/W6606906144","https://openalex.org/W6627191510","https://openalex.org/W6632020017","https://openalex.org/W6636090866","https://openalex.org/W6636907455","https://openalex.org/W6636915900","https://openalex.org/W6638854250","https://openalex.org/W6654026044","https://openalex.org/W6738450646","https://openalex.org/W6759172513"],"related_works":["https://openalex.org/W2100947578","https://openalex.org/W2161008081","https://openalex.org/W1555832326","https://openalex.org/W4298186509","https://openalex.org/W2556702969","https://openalex.org/W217221262","https://openalex.org/W611030372","https://openalex.org/W2000748181","https://openalex.org/W2971810784","https://openalex.org/W2953749697"],"abstract_inverted_index":{"Irregular":[0],"spellings":[1],"in":[2,80,101,140],"clinical":[3,24,81,141],"free":[4],"text":[5,25],"present":[6],"challenges":[7],"to":[8,32,54,64,134],"natural":[9],"language":[10],"processing.":[11],"A":[12],"number":[13],"of":[14,23,35,58,69,99,114,138],"spelling":[15,21,43,103],"correction":[16,22],"tools":[17],"exist,":[18],"but":[19],"automated":[20],"is":[26],"not":[27],"a":[28,41,87,95,126],"routine":[29],"practice":[30],"due":[31],"the":[33],"risk":[34],"introducing":[36],"new":[37],"errors.":[38],"We":[39],"developed":[40],"novel":[42],"analysis":[44],"application":[45,85,124],"that":[46,68,75,109,121],"combines":[47],"Word2Vec":[48],"and":[49,72,116,132],"Levenshtein":[50],"Edit":[51],"Distance":[52],"Constraints":[53],"identify":[55],"variant":[56],"forms":[57],"words.":[59],"The":[60,83],"use":[61],"case":[62],"applied":[63],"this":[65,122],"study":[66],"was":[67],"discovering":[70],"herbal":[71],"dietary":[73],"supplements":[74],"interact":[76],"with":[77],"prescription":[78],"medications":[79],"text.":[82,142],"prototype":[84,123],"processed":[86],"large":[88],"corpus":[89],"(approximately":[90],"1.6":[91],"million":[92],"records),":[93],"achieving":[94],"positive":[96,111],"predictive":[97,112],"value":[98],"0.9322,":[100],"identifying":[102],"variants,":[104],"outperforming":[105],"two":[106],"baseline":[107],"methods":[108],"achieved":[110],"values":[113],"0.0348":[115],"0.0067.":[117],"Our":[118],"findings":[119],"suggest":[120],"provides":[125],"more":[127],"efficient":[128],"method":[129],"for":[130],"researchers":[131],"clinicians":[133],"find":[135],"valid":[136],"misspellings":[137],"terms":[139]},"counts_by_year":[],"updated_date":"2026-02-24T19:35:01.260952","created_date":"2025-10-10T00:00:00"}
