{"id":"https://openalex.org/W4406459053","doi":"https://doi.org/10.1109/bigdata62323.2024.10825160","title":"Structured Extraction of Real World Medical Knowledge using LLMs for Summarization and Search","display_name":"Structured Extraction of Real World Medical Knowledge using LLMs for Summarization and Search","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406459053","doi":"https://doi.org/10.1109/bigdata62323.2024.10825160"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825160","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100728163","display_name":"Edward Kim","orcid":"https://orcid.org/0000-0002-0781-5531"},"institutions":[{"id":"https://openalex.org/I119215111","display_name":"AcademyHealth","ror":"https://ror.org/03zx0nf33","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I119215111"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Edward Kim","raw_affiliation_strings":["RespondHealth,Washington, DC,USA"],"affiliations":[{"raw_affiliation_string":"RespondHealth,Washington, DC,USA","institution_ids":["https://openalex.org/I119215111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115615977","display_name":"Manil Shrestha","orcid":null},"institutions":[{"id":"https://openalex.org/I119215111","display_name":"AcademyHealth","ror":"https://ror.org/03zx0nf33","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I119215111"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Manil Shrestha","raw_affiliation_strings":["RespondHealth,Washington, DC,USA"],"affiliations":[{"raw_affiliation_string":"RespondHealth,Washington, DC,USA","institution_ids":["https://openalex.org/I119215111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113523272","display_name":"Richard Foty","orcid":null},"institutions":[{"id":"https://openalex.org/I119215111","display_name":"AcademyHealth","ror":"https://ror.org/03zx0nf33","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I119215111"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Foty","raw_affiliation_strings":["RespondHealth,Washington, DC,USA"],"affiliations":[{"raw_affiliation_string":"RespondHealth,Washington, DC,USA","institution_ids":["https://openalex.org/I119215111"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115615978","display_name":"Tom DeLay","orcid":null},"institutions":[{"id":"https://openalex.org/I119215111","display_name":"AcademyHealth","ror":"https://ror.org/03zx0nf33","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I119215111"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tom DeLay","raw_affiliation_strings":["RespondHealth,Washington, DC,USA"],"affiliations":[{"raw_affiliation_string":"RespondHealth,Washington, DC,USA","institution_ids":["https://openalex.org/I119215111"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112407211","display_name":"Vicki Seyfert\u2010Margolis","orcid":null},"institutions":[{"id":"https://openalex.org/I119215111","display_name":"AcademyHealth","ror":"https://ror.org/03zx0nf33","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I119215111"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vicki Seyfert-Margolis","raw_affiliation_strings":["RespondHealth,Washington, DC,USA"],"affiliations":[{"raw_affiliation_string":"RespondHealth,Washington, DC,USA","institution_ids":["https://openalex.org/I119215111"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100728163"],"corresponding_institution_ids":["https://openalex.org/I119215111"],"apc_list":null,"apc_paid":null,"fwci":1.2322,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.80160787,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3421","last_page":"3430"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.9392865896224976},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6117960810661316},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5025882720947266},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4902525246143341},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3558085560798645},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.0698406994342804},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.06580361723899841}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.9392865896224976},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6117960810661316},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5025882720947266},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4902525246143341},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3558085560798645},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0698406994342804},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.06580361723899841}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825160","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1034019924","https://openalex.org/W1934602670","https://openalex.org/W2103017472","https://openalex.org/W2136410628","https://openalex.org/W2142016317","https://openalex.org/W2159583324","https://openalex.org/W2169099542","https://openalex.org/W2170146596","https://openalex.org/W2346452181","https://openalex.org/W2396881363","https://openalex.org/W2404369708","https://openalex.org/W2537679995","https://openalex.org/W2572147858","https://openalex.org/W2767891136","https://openalex.org/W2789244308","https://openalex.org/W2896457183","https://openalex.org/W2911489562","https://openalex.org/W2915623326","https://openalex.org/W2946102094","https://openalex.org/W2951146425","https://openalex.org/W2963716420","https://openalex.org/W2964221236","https://openalex.org/W2970771982","https://openalex.org/W2974184401","https://openalex.org/W3003265726","https://openalex.org/W3010336026","https://openalex.org/W3034383590","https://openalex.org/W3046375318","https://openalex.org/W3087028093","https://openalex.org/W3106224367","https://openalex.org/W4220735656","https://openalex.org/W4224997359","https://openalex.org/W4244330657","https://openalex.org/W4292779060","https://openalex.org/W4297253404","https://openalex.org/W4322627064","https://openalex.org/W4379986648","https://openalex.org/W4385245566","https://openalex.org/W4385573087","https://openalex.org/W4385848332","https://openalex.org/W4388615774","https://openalex.org/W4395686609","https://openalex.org/W4402671441","https://openalex.org/W4403579106","https://openalex.org/W4403883673","https://openalex.org/W4404783220","https://openalex.org/W6632766574","https://openalex.org/W6704842505","https://openalex.org/W6713634263","https://openalex.org/W6731801563","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6778883912","https://openalex.org/W6865225510","https://openalex.org/W6873784770","https://openalex.org/W6873991619","https://openalex.org/W6922523504"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W3148229873","https://openalex.org/W4389760904","https://openalex.org/W2150160875","https://openalex.org/W2351187795"],"abstract_inverted_index":{"Creation":[0],"and":[1,17,32,39,78,85,93,113,132,273,305],"curation":[2,114],"of":[3,19,34,50,61,97,115,159,166,195,209,228,266,269,281,310,330],"knowledge":[4,118,134,271],"graphs":[5,135,272],"at":[6],"scale":[7],"can":[8],"be":[9,144],"used":[10,74],"to":[11,128,143,169,179,184,193,317,322],"exponentially":[12],"accelerate":[13],"the":[14,51,59,98,111,154,181,188,196,203,207,226,259,267,328],"discovery,":[15],"matching,":[16],"analysis":[18,33],"diseases":[20],"in":[21,53,58,75,130,187,202,216,225,255,313,319],"real-world":[22,332],"data.":[23],"While":[24],"disease":[25,38,72,91,100,254],"ontologies":[26,171],"are":[27,70,102],"useful":[28],"for":[29,141,240,276,291],"annotation,":[30],"integration,":[31],"biological":[35],"data,":[36],"codified":[37,249],"procedure":[40],"categories":[41],"e.g.":[42],"SNOMED-CT,":[43,175],"ICD10,":[44],"CPT,":[45],"etc.":[46,178],"rarely":[47],"capture":[48],"all":[49],"nuances":[52],"a":[54,95,116,164,217,237,241,282,314,331,343],"patient":[55,117,229,238],"condition":[56],"or,":[57],"case":[60,233],"rare":[62,242,344],"disease,":[63,345],"may":[64],"not":[65],"even":[66],"exist.":[67],"Furthermore,":[68],"there":[69],"multiple":[71],"definitions":[73],"data":[76,142],"sources":[77],"publications,":[79],"each":[80],"having":[81],"its":[82],"own":[83],"structure":[84],"hierarchy.":[86],"Mapping":[87],"between":[88],"ontologies,":[89],"finding":[90],"clusters,":[92],"building":[94],"representation":[96],"chosen":[99],"area":[101,227],"resource-intensive,":[103],"often":[104],"requiring":[105],"significant":[106],"human":[107],"capital.":[108],"We":[109,162],"propose":[110],"creation":[112],"graph":[119],"utilizing":[120],"large":[121],"language":[122,138,148],"model":[123],"extraction":[124,215,303],"techniques.":[125],"In":[126,231,258],"order":[127],"expand":[129],"volume":[131],"scale,":[133],"with":[136,220,287,342],"generalized":[137],"capability":[139],"allow":[140],"extracted":[145,182],"using":[146],"natural":[147],"rather":[149],"than":[150],"being":[151],"constrained":[152],"by":[153],"exact":[155],"terminology":[156],"or":[157],"hierarchy":[158],"existing":[160,170],"ontologies.":[161],"develop":[163],"method":[165,265,334],"mapping":[167,320],"back":[168],"such":[172],"as":[173,250,294],"MeSH,":[174],"RxNORM,":[176],"HPO,":[177],"ground":[180,296,348],"entities":[183,186],"known":[185],"medical":[189],"community.We":[190],"have":[191],"access":[192],"one":[194],"largest":[197],"ambulatory":[198],"care":[199],"EHR":[200],"databases":[201],"country.":[204],"To":[205],"demonstrate":[206],"effectiveness":[208],"our":[210,214,264,295,300],"method,":[211],"we":[212,235,262,298,326],"benchmark":[213],"test":[218],"set":[219],"over":[221],"33.6M":[222],"unique":[223],"patients,":[224],"search.":[230],"this":[232],"study,":[234],"perform":[236],"search":[239],"disease:":[243],"Dravet":[244,246,292],"syndrome.":[245],"syndrome":[247,293],"was":[248],"an":[251,307],"ICD10":[252,289],"recognizable":[253],"October":[256],"2020.":[257],"following":[260],"research,":[261],"describe":[263],"construction":[268],"patient-specific":[270],"subsequent":[274],"searches":[275],"patients":[277,286,312,321,341],"who":[278],"exhibit":[279],"symptoms":[280],"particular":[283],"disease.":[284],"Using":[285],"confirmed":[288],"codes":[290],"truth,":[297],"utilize":[299],"LLM-based":[301],"entity":[302],"techniques":[304],"formalize":[306],"algorithmic":[308],"way":[309],"characterizing":[311],"grounded":[315],"ontology":[316],"assist":[318],"specific":[323],"diseases.":[324],"Finally,":[325],"present":[327],"results":[329],"discovery":[333],"on":[335],"Beta-propeller":[336],"protein-associated":[337],"neurodegeneration":[338],"(BPAN),":[339],"identifying":[340],"where":[346],"no":[347],"truth":[349],"currently":[350],"exists.":[351]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
