{"id":"https://openalex.org/W763180640","doi":"https://doi.org/10.3233/978-1-61499-538-8-175","title":"Taming Big Data: An Information Extraction Strategy for Large Clinical Text Corpora","display_name":"Taming Big Data: An Information Extraction Strategy for Large Clinical Text Corpora","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W763180640","doi":"https://doi.org/10.3233/978-1-61499-538-8-175","mag":"763180640","pmid":"https://pubmed.ncbi.nlm.nih.gov/26152985"},"language":"en","primary_location":{"id":"doi:10.3233/978-1-61499-538-8-175","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-61499-538-8-175","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005755665","display_name":"Adi V. Gundlapalli","orcid":"https://orcid.org/0000-0002-7423-7903"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]},{"id":"https://openalex.org/I4210089864","display_name":"VA Salt Lake City Healthcare System","ror":"https://ror.org/007fyq698","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089864","https://openalex.org/I4210113540"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Gundlapalli Adi V.","raw_affiliation_strings":["VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT"],"affiliations":[{"raw_affiliation_string":"VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT","institution_ids":["https://openalex.org/I4210089864","https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007564242","display_name":"Guy Divita","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089864","display_name":"VA Salt Lake City Healthcare System","ror":"https://ror.org/007fyq698","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089864","https://openalex.org/I4210113540"]},{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divita Guy","raw_affiliation_strings":["VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT"],"affiliations":[{"raw_affiliation_string":"VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT","institution_ids":["https://openalex.org/I4210089864","https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009323063","display_name":"Marjorie E. Carter","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089864","display_name":"VA Salt Lake City Healthcare System","ror":"https://ror.org/007fyq698","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089864","https://openalex.org/I4210113540"]},{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carter Marjorie E.","raw_affiliation_strings":["VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT"],"affiliations":[{"raw_affiliation_string":"VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT","institution_ids":["https://openalex.org/I4210089864","https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034658782","display_name":"Andrew Redd","orcid":"https://orcid.org/0000-0002-6149-2438"},"institutions":[{"id":"https://openalex.org/I4210089864","display_name":"VA Salt Lake City Healthcare System","ror":"https://ror.org/007fyq698","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089864","https://openalex.org/I4210113540"]},{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Redd Andrew","raw_affiliation_strings":["VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT"],"affiliations":[{"raw_affiliation_string":"VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT","institution_ids":["https://openalex.org/I4210089864","https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078072232","display_name":"Matthew H. Samore","orcid":"https://orcid.org/0000-0002-4862-9196"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]},{"id":"https://openalex.org/I4210089864","display_name":"VA Salt Lake City Healthcare System","ror":"https://ror.org/007fyq698","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089864","https://openalex.org/I4210113540"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samore Matthew H.","raw_affiliation_strings":["VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT"],"affiliations":[{"raw_affiliation_string":"VA Salt Lake City Health Care System and University of Utah, Salt Lake City, UT","institution_ids":["https://openalex.org/I4210089864","https://openalex.org/I223532165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076156293","display_name":"Kalpana Gupta","orcid":"https://orcid.org/0000-0002-6766-3576"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gupta Kalpana","raw_affiliation_strings":["VA Boston Health Care System and Boston University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"VA Boston Health Care System and Boston University, Boston, MA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031733730","display_name":"Barbara W. Trautner","orcid":"https://orcid.org/0000-0001-7986-9099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trautner Barbara","raw_affiliation_strings":["VA Houston Health Care System and Baylor College of Medicine, Houston, TX"],"affiliations":[{"raw_affiliation_string":"VA Houston Health Care System and Baylor College of Medicine, Houston, TX","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5005755665"],"corresponding_institution_ids":["https://openalex.org/I223532165","https://openalex.org/I4210089864"],"apc_list":null,"apc_paid":null,"fwci":0.1714,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.56130705,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"213","issue":null,"first_page":"175","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6038894057273865},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5724415183067322},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.568278431892395},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.562379777431488},{"id":"https://openalex.org/keywords/yield","display_name":"Yield (engineering)","score":0.5209882855415344},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.49115511775016785},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4783613085746765},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4769538640975952},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.46400442719459534},{"id":"https://openalex.org/keywords/medical-record","display_name":"Medical record","score":0.45055320858955383},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.31466516852378845},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.29428547620773315},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2553079128265381},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2508373260498047},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.193141907453537},{"id":"https://openalex.org/keywords/radiology","display_name":"Radiology","score":0.11581775546073914}],"concepts":[{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6038894057273865},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5724415183067322},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.568278431892395},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.562379777431488},{"id":"https://openalex.org/C134121241","wikidata":"https://www.wikidata.org/wiki/Q899301","display_name":"Yield (engineering)","level":2,"score":0.5209882855415344},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.49115511775016785},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4783613085746765},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4769538640975952},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.46400442719459534},{"id":"https://openalex.org/C195910791","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Medical record","level":2,"score":0.45055320858955383},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.31466516852378845},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.29428547620773315},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2553079128265381},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2508373260498047},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.193141907453537},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.11581775546073914},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D006703","descriptor_name":"Ill-Housed Persons","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006703","descriptor_name":"Ill-Housed Persons","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006703","descriptor_name":"Ill-Housed Persons","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D035843","descriptor_name":"Biomedical Research","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D035843","descriptor_name":"Biomedical Research","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D035843","descriptor_name":"Biomedical Research","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D057286","descriptor_name":"Electronic Health Records","qualifier_ui":"Q000458","qualifier_name":"organization & administration","is_major_topic":false},{"descriptor_ui":"D057286","descriptor_name":"Electronic Health Records","qualifier_ui":"Q000458","qualifier_name":"organization & administration","is_major_topic":false},{"descriptor_ui":"D057286","descriptor_name":"Electronic Health Records","qualifier_ui":"Q000458","qualifier_name":"organization & administration","is_major_topic":false},{"descriptor_ui":"D062885","descriptor_name":"Urinary Catheters","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D062885","descriptor_name":"Urinary Catheters","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D062885","descriptor_name":"Urinary Catheters","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.3233/978-1-61499-538-8-175","is_oa":false,"landing_page_url":"https://doi.org/10.3233/978-1-61499-538-8-175","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},{"id":"pmid:26152985","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/26152985","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in health technology and informatics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2114388055","https://openalex.org/W2140213838","https://openalex.org/W2169818249","https://openalex.org/W2408485528","https://openalex.org/W2415646068"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4405901645","https://openalex.org/W4394895745","https://openalex.org/W4247566972","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W2910064364","https://openalex.org/W4200136508"],"abstract_inverted_index":{"Concepts":[0],"of":[1,22,44,51,96,108,115],"interest":[2],"for":[3,35,93,118],"clinical":[4,13,45,119],"and":[5,38,61,74,106,122],"research":[6],"purposes":[7],"are":[8],"not":[9],"uniformly":[10],"distributed":[11],"in":[12,16,56,85],"text":[14],"available":[15],"electronic":[17],"medical":[18],"records.":[19],"The":[20],"purpose":[21],"our":[23],"study":[24],"was":[25],"to":[26,30,112],"identify":[27],"filtering":[28,102],"techniques":[29],"select":[31],"'high":[32,52],"yield'":[33,53],"documents":[34],"increased":[36],"efficacy":[37],"throughput.":[39],"Using":[40],"two":[41,57],"large":[42],"corpora":[43],"text,":[46],"we":[47],"demonstrate":[48],"the":[49,67,97],"identification":[50],"document":[54],"sets":[55],"unrelated":[58],"domains:":[59],"homelessness":[60],"indwelling":[62],"urinary":[63,79],"catheters.":[64],"For":[65,78],"homelessness,":[66],"high":[68,98],"yield":[69,99],"set":[70],"includes":[71],"homeless":[72],"program":[73],"social":[75],"work":[76],"notes.":[77],"catheters,":[80],"concepts":[81,117],"were":[82],"more":[83],"prevalent":[84],"notes":[86,91],"from":[87],"hospitalized":[88],"patients;":[89],"nursing":[90],"accounted":[92],"a":[94],"majority":[95],"set.":[100],"This":[101],"will":[103],"enable":[104],"customization":[105],"refining":[107],"information":[109],"extraction":[110,114],"pipelines":[111],"facilitate":[113],"relevant":[116],"decision":[120],"support":[121],"other":[123],"uses.":[124]},"counts_by_year":[{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
