{"id":"https://openalex.org/W3211844088","doi":"https://doi.org/10.1093/jamia/ocab243","title":"Natural language inference for curation of structured clinical registries from unstructured text","display_name":"Natural language inference for curation of structured clinical registries from unstructured text","publication_year":2021,"publication_date":"2021-10-26","ids":{"openalex":"https://openalex.org/W3211844088","doi":"https://doi.org/10.1093/jamia/ocab243","mag":"3211844088","pmid":"https://pubmed.ncbi.nlm.nih.gov/34791282"},"language":"en","primary_location":{"id":"doi:10.1093/jamia/ocab243","is_oa":false,"landing_page_url":"https://doi.org/10.1093/jamia/ocab243","pdf_url":null,"source":{"id":"https://openalex.org/S129839026","display_name":"Journal of the American Medical Informatics Association","issn_l":"1067-5027","issn":["1067-5027","1527-974X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Medical Informatics Association","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8714278","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032838430","display_name":"Bethany Percha","orcid":"https://orcid.org/0000-0003-0988-4183"},"institutions":[{"id":"https://openalex.org/I98704320","display_name":"Icahn School of Medicine at Mount Sinai","ror":"https://ror.org/04a9tmd77","country_code":"US","type":"education","lineage":["https://openalex.org/I1320796813","https://openalex.org/I98704320"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bethany Percha","raw_affiliation_strings":["Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, New York, New York, USA","Department of Medicine, Icahn School of Medicine at Mount Sinai, New York, New York, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]},{"raw_affiliation_string":"Department of Medicine, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051173089","display_name":"Kereeti Pisapati","orcid":null},"institutions":[{"id":"https://openalex.org/I1320796813","display_name":"Mount Sinai Health System","ror":"https://ror.org/04kfn4587","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1320796813"]},{"id":"https://openalex.org/I98704320","display_name":"Icahn School of Medicine at Mount Sinai","ror":"https://ror.org/04a9tmd77","country_code":"US","type":"education","lineage":["https://openalex.org/I1320796813","https://openalex.org/I98704320"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kereeti Pisapati","raw_affiliation_strings":["Breast Surgical Oncology, Icahn School of Medicine at Mount Sinai, New York, New York, USA","Mount Sinai Innovation Partners, Mount Sinai Health System, New York, New York, USA","Tisch Cancer Institute, Icahn School of Medicine at Mount Sinai, New York, New York, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Breast Surgical Oncology, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]},{"raw_affiliation_string":"Mount Sinai Innovation Partners, Mount Sinai Health System, New York, New York, USA","institution_ids":["https://openalex.org/I1320796813"]},{"raw_affiliation_string":"Tisch Cancer Institute, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024347376","display_name":"Cynthia Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I98704320","display_name":"Icahn School of Medicine at Mount Sinai","ror":"https://ror.org/04a9tmd77","country_code":"US","type":"education","lineage":["https://openalex.org/I1320796813","https://openalex.org/I98704320"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cynthia Gao","raw_affiliation_strings":["Department of Medicine, Icahn School of Medicine at Mount Sinai, New York, New York, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Medicine, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077808447","display_name":"Hank Schmidt","orcid":"https://orcid.org/0000-0001-6530-6324"},"institutions":[{"id":"https://openalex.org/I98704320","display_name":"Icahn School of Medicine at Mount Sinai","ror":"https://ror.org/04a9tmd77","country_code":"US","type":"education","lineage":["https://openalex.org/I1320796813","https://openalex.org/I98704320"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hank Schmidt","raw_affiliation_strings":["Breast Surgical Oncology, Icahn School of Medicine at Mount Sinai, New York, New York, USA","Tisch Cancer Institute, Icahn School of Medicine at Mount Sinai, New York, New York, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Breast Surgical Oncology, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]},{"raw_affiliation_string":"Tisch Cancer Institute, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5032838430"],"corresponding_institution_ids":["https://openalex.org/I98704320"],"apc_list":{"value":3967,"currency":"USD","value_usd":3967},"apc_paid":null,"fwci":1.8189,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.88091226,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"29","issue":"1","first_page":"97","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.3296000063419342,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.3296000063419342,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.14329999685287476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1128000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7688865661621094},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6392807364463806},{"id":"https://openalex.org/keywords/data-curation","display_name":"Data curation","score":0.6088275909423828},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.580346941947937},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5691713094711304},{"id":"https://openalex.org/keywords/confusion","display_name":"Confusion","score":0.54857337474823},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.45426031947135925},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.45043256878852844},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3806651830673218},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3655080199241638},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3327915668487549},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.18269577622413635},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15967023372650146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7688865661621094},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6392807364463806},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.6088275909423828},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.580346941947937},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5691713094711304},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.54857337474823},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.45426031947135925},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.45043256878852844},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3806651830673218},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3655080199241638},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3327915668487549},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.18269577622413635},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15967023372650146},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012042","descriptor_name":"Registries","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012042","descriptor_name":"Registries","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012042","descriptor_name":"Registries","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012189","descriptor_name":"Retrospective Studies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012189","descriptor_name":"Retrospective Studies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012189","descriptor_name":"Retrospective Studies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1093/jamia/ocab243","is_oa":false,"landing_page_url":"https://doi.org/10.1093/jamia/ocab243","pdf_url":null,"source":{"id":"https://openalex.org/S129839026","display_name":"Journal of the American Medical Informatics Association","issn_l":"1067-5027","issn":["1067-5027","1527-974X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Medical Informatics Association","raw_type":"journal-article"},{"id":"pmid:34791282","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34791282","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the American Medical Informatics Association : JAMIA","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:8714278","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8714278","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Am Med Inform Assoc","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:8714278","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8714278","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Am Med Inform Assoc","raw_type":"Text"},"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309658","display_name":"Icahn School of Medicine at Mount Sinai","ror":"https://ror.org/04a9tmd77"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2034280831","https://openalex.org/W2093274439","https://openalex.org/W2108258145","https://openalex.org/W2142100256","https://openalex.org/W2165698076","https://openalex.org/W2891469329","https://openalex.org/W2937423263","https://openalex.org/W2976369321","https://openalex.org/W3105625590","https://openalex.org/W3165345393","https://openalex.org/W4211148418"],"related_works":["https://openalex.org/W2964976023","https://openalex.org/W1982477181","https://openalex.org/W2403083015","https://openalex.org/W4285488523","https://openalex.org/W2011367623","https://openalex.org/W2701062589","https://openalex.org/W2734065904","https://openalex.org/W2027125286","https://openalex.org/W2049289069","https://openalex.org/W2809078847"],"abstract_inverted_index":{"OBJECTIVE:":[0],"Clinical":[1],"registries-structured":[2],"databases":[3],"of":[4,19,52,133,184,202,218,230,237],"demographic,":[5],"diagnosis,":[6],"and":[7,17,31,37,79,116,128,160,221],"treatment":[8],"information-play":[9],"vital":[10],"roles":[11],"in":[12,112,165,211],"retrospective":[13],"studies,":[14],"operational":[15],"planning,":[16],"assessment":[18],"patient":[20,101],"eligibility":[21],"for":[22,40,61,188],"research,":[23],"including":[24],"clinical":[25,166,213],"trials.":[26],"Registry":[27],"curation,":[28,232],"a":[29,58,97,185,194],"manual":[30],"time-intensive":[32],"process,":[33],"is":[34],"always":[35],"costly":[36],"often":[38],"impossible":[39],"rare":[41],"or":[42,181],"underfunded":[43],"diseases.":[44],"Our":[45],"goal":[46],"was":[47],"to":[48,76,82,149],"evaluate":[49],"the":[50,123,182,212,216,228,238],"feasibility":[51],"natural":[53,172],"language":[54,173],"inference":[55],"(NLI)":[56],"as":[57,153,155],"scalable":[59],"solution":[60],"registry":[62,90,190,231],"curation.":[63],"MATERIALS":[64],"AND":[65,169],"METHODS:":[66],"We":[67],"applied":[68,235],"five":[69],"state-of-the-art,":[70],"pretrained,":[71],"deep":[72],"learning-based":[73],"NLI":[74,107,197,207,224],"models":[75,108,225],"clinical,":[77],"laboratory,":[78],"pathology":[80],"notes":[81],"infer":[83],"information":[84],"about":[85],"43":[86,134],"different":[87,203],"breast":[88,102],"oncology":[89,103],"fields.":[91,118,135],"Model":[92],"inferences":[93,143],"were":[94],"evaluated":[95],"against":[96],"manually":[98],"curated,":[99],"7439":[100],"research":[104],"database.":[105],"RESULTS:":[106],"showed":[109],"considerable":[110],"variation":[111],"performance,":[113],"both":[114],"within":[115],"across":[117],"One":[119],"model,":[120],"ALBERT,":[121],"outperformed":[122],"others":[124],"(BART,":[125],"RoBERTa,":[126],"XLNet,":[127],"ELECTRA)":[129],"on":[130,158],"22":[131],"out":[132],"A":[136],"detailed":[137],"error":[138],"analysis":[139],"revealed":[140],"that":[141],"incorrect":[142],"primarily":[144],"arose":[145],"through":[146],"models'":[147],"tendency":[148],"misinterpret":[150],"historical":[151],"findings,":[152],"well":[154],"confusion":[156],"based":[157],"abbreviations":[159],"subtle":[161],"term":[162],"variants":[163],"common":[164],"text.":[167],"DISCUSSION":[168],"CONCLUSION:":[170],"Traditional":[171],"processing":[174],"methods":[175,208],"require":[176],"specially":[177],"annotated":[178],"training":[179],"sets":[180],"construction":[183],"separate":[186],"model":[187,198],"each":[189],"field.":[191],"In":[192],"contrast,":[193],"single":[195],"pretrained":[196],"can":[199],"curate":[200],"dozens":[201],"fields":[204],"simultaneously.":[205],"Surprisingly,":[206],"remain":[209],"unexplored":[210],"domain":[214],"outside":[215],"realm":[217],"shared":[219],"tasks":[220],"benchmarks.":[222],"Modern":[223],"could":[226],"increase":[227],"efficiency":[229],"even":[233],"when":[234],"\"out":[236],"box\"":[239],"with":[240],"no":[241],"additional":[242],"training.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
