{"id":"https://openalex.org/W4410568503","doi":"https://doi.org/10.32604/cmc.2025.063560","title":"Large Language Model in Healthcare for the Prediction of Genetic Variants from Unstructured Text Medicine Data Using Natural Language Processing","display_name":"Large Language Model in Healthcare for the Prediction of Genetic Variants from Unstructured Text Medicine Data Using Natural Language Processing","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4410568503","doi":"https://doi.org/10.32604/cmc.2025.063560"},"language":"en","primary_location":{"id":"doi:10.32604/cmc.2025.063560","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.063560","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.32604/cmc.2025.063560","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101850517","display_name":"Noor Ayesha","orcid":"https://orcid.org/0000-0003-1439-3418"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Noor Ayesha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047043390","display_name":"Muhammad Mujahid","orcid":"https://orcid.org/0009-0005-5751-5528"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhammad Mujahid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115824704","display_name":"Abeer Rashad Mirdad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abeer Rashad Mirdad","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051374204","display_name":"Faten S. Alamri","orcid":"https://orcid.org/0000-0003-0312-8731"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Faten S. Alamri","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101969685","display_name":"Amjad Rehman Khan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amjad R. Khan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101850517"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.4259,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.92304497,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"84","issue":"1","first_page":"1883","last_page":"1899"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.7785000205039978,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.7785000205039978,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6897000074386597,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.6535000205039978,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6327773332595825},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6064158082008362},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.5663542151451111},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5318958759307861},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4998657703399658},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.29249995946884155},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.23903381824493408}],"concepts":[{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6327773332595825},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6064158082008362},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.5663542151451111},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5318958759307861},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4998657703399658},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.29249995946884155},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.23903381824493408}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.32604/cmc.2025.063560","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.063560","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.32604/cmc.2025.063560","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.063560","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7400000095367432,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W3215317537","https://openalex.org/W4312220150","https://openalex.org/W4362735179","https://openalex.org/W4384561707","https://openalex.org/W4385227045","https://openalex.org/W4386102168","https://openalex.org/W4388725043","https://openalex.org/W4390415052","https://openalex.org/W4391126287","https://openalex.org/W4391808470","https://openalex.org/W4391809309","https://openalex.org/W4391855109","https://openalex.org/W4393378068","https://openalex.org/W4400202790","https://openalex.org/W4400679572","https://openalex.org/W4401043206","https://openalex.org/W4401386758","https://openalex.org/W4401387006","https://openalex.org/W4401820448","https://openalex.org/W4401953966","https://openalex.org/W4403826725","https://openalex.org/W4404887887","https://openalex.org/W4406234152","https://openalex.org/W4406859636","https://openalex.org/W4407264242","https://openalex.org/W4407354885"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3203889067","https://openalex.org/W3184725726","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2378793138","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Large":[0],"language":[1,6,78],"models":[2],"(LLMs)":[3],"and":[4,15,19,57,66,76,107,143,157,211,214,244],"natural":[5],"processing":[7],"(NLP)":[8],"have":[9,129],"significant":[10],"promise":[11],"to":[12,53,59,173,191],"improve":[13],"efficiency":[14],"refine":[16],"healthcare":[17],"decision-making":[18],"clinical":[20,126,132,181],"results.":[21],"Numerous":[22],"domains,":[23],"including":[24,140],"healthcare,":[25,261],"are":[26,150],"rapidly":[27],"adopting":[28],"LLMs":[29],"for":[30,205,219],"the":[31,74,85,88,114,117,131,175,180,192,195,230,269],"classification":[32],"of":[33,79,87,109,116,154,169,224,229,256],"biomedical":[34],"textual":[35,125,133],"data":[36,134],"in":[37,69,98,260,268],"medical":[38,80],"research.":[39],"The":[40,147,166,227,249],"LLM":[41,258],"can":[42],"derive":[43],"insights":[44],"from":[45,236],"intricate,":[46],"extensive,":[47],"unstructured":[48],"training":[49],"data.":[50,127],"Variants":[51],"need":[52],"be":[54],"accurately":[55],"identified":[56],"classified":[58],"advance":[60],"genetic":[61,176],"research,":[62],"provide":[63,252],"individualized":[64],"treatment,":[65],"assist":[67],"physicians":[68],"making":[70],"better":[71],"choices.":[72],"However,":[73],"sophisticated":[75],"perplexing":[77],"reports":[81],"is":[82,172],"often":[83],"beyond":[84],"capabilities":[86],"devices":[89],"we":[90],"now":[91],"utilize.":[92],"Such":[93],"an":[94],"approach":[95],"may":[96],"result":[97],"incorrect":[99],"diagnoses,":[100],"which":[101],"could":[102],"affect":[103],"a":[104,184,215,253],"patient\u2019s":[105],"prognosis":[106],"course":[108],"therapy.":[110],"This":[111],"study":[112,171],"evaluated":[113],"efficacy":[115],"proposed":[118,231],"model":[119,198,238],"by":[120],"looking":[121],"at":[122],"publicly":[123],"accessible":[124],"We":[128],"cleaned":[130],"using":[135,152,183,208,222],"various":[136],"text":[137],"preprocessing":[138],"methods,":[139],"stemming,":[141],"tokenization,":[142],"stop":[144],"word":[145],"removal.":[146],"important":[148,167],"features":[149,210],"extracted":[151],"Bag":[153,223],"Words":[155,225],"(BoW)":[156],"Term":[158],"Frequency-Inverse":[159],"Document":[160],"Frequency":[161],"(TFIDF)":[162],"feature":[163],"engineering":[164],"methods.":[165],"motive":[168],"this":[170],"predict":[174],"variants":[177],"based":[178],"on":[179],"evidence":[182],"novel":[185],"method":[186],"with":[187,202,241,246],"minimal":[188],"error.":[189],"According":[190],"experimental":[193],"results,":[194],"random":[196],"forest":[197],"achieved":[199],"61%":[200],"accuracy":[201,213,228],"67%":[203],"precision":[204],"class":[206,220],"9":[207,221],"TFIDF":[209],"63%":[212],"73%":[216],"F1":[217],"score":[218],"features.":[226],"BERT":[232],"(Bidirectional":[233],"Encoder":[234],"Representations":[235],"Transformers)":[237],"was":[239],"70%":[240],"5-fold":[242],"cross-validation":[243],"71%":[245],"10-fold":[247],"cross-validation.":[248],"research":[250],"results":[251],"comprehensive":[254],"overview":[255],"current":[257],"methods":[259],"benefiting":[262],"academics":[263],"as":[264,266],"well":[265],"professionals":[267],"discipline.":[270]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
