{"id":"https://openalex.org/W3082806762","doi":"https://doi.org/10.1109/embc44109.2020.9175781","title":"Predicting the pathogenicity of protein coding mutations using Natural Language Processing","display_name":"Predicting the pathogenicity of protein coding mutations using Natural Language Processing","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3082806762","doi":"https://doi.org/10.1109/embc44109.2020.9175781","mag":"3082806762","pmid":"https://pubmed.ncbi.nlm.nih.gov/33019302"},"language":"en","primary_location":{"id":"doi:10.1109/embc44109.2020.9175781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/embc44109.2020.9175781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 42nd Annual International Conference of the IEEE Engineering in Medicine &amp; Biology Society (EMBC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018741090","display_name":"Naeem Rehmat","orcid":null},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"education","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Naeem Rehmat","raw_affiliation_strings":["Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences","institution_ids":["https://openalex.org/I201384688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063738440","display_name":"Hammad Farooq","orcid":"https://orcid.org/0000-0001-5691-298X"},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"education","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Hammad Farooq","raw_affiliation_strings":["Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences","institution_ids":["https://openalex.org/I201384688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100785501","display_name":"Sanjay Kumar","orcid":"https://orcid.org/0000-0002-2667-1035"},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"education","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Sanjay Kumar","raw_affiliation_strings":["Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences","institution_ids":["https://openalex.org/I201384688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112730299","display_name":"Sibt ul Hussain","orcid":null},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"education","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Sibt ul Hussain","raw_affiliation_strings":["Recognition, Vision and Learning research group (ReVeaL), National University of Computer & Emerging Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Recognition, Vision and Learning research group (ReVeaL), National University of Computer & Emerging Sciences","institution_ids":["https://openalex.org/I201384688"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074736458","display_name":"Hammad Naveed","orcid":"https://orcid.org/0000-0002-1867-974X"},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"education","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Hammad Naveed","raw_affiliation_strings":["Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Research Lab (cbrlab.org), National University of Computer & Emerging Sciences","institution_ids":["https://openalex.org/I201384688"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I201384688"],"apc_list":null,"apc_paid":null,"fwci":0.705,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.74477601,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"2020","issue":null,"first_page":"5842","last_page":"5846"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7741786241531372},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7236261963844299},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.6308261752128601},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5920217633247375},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5723183155059814},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4981567859649658},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.49770453572273254},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4921833574771881},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4851406514644623},{"id":"https://openalex.org/keywords/pathogenicity","display_name":"Pathogenicity","score":0.42711806297302246},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.42362383008003235},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.09769579768180847}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7741786241531372},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7236261963844299},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.6308261752128601},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5920217633247375},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5723183155059814},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4981567859649658},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.49770453572273254},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4921833574771881},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4851406514644623},{"id":"https://openalex.org/C64502627","wikidata":"https://www.wikidata.org/wiki/Q170065","display_name":"Pathogenicity","level":2,"score":0.42711806297302246},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.42362383008003235},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09769579768180847},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C89423630","wikidata":"https://www.wikidata.org/wiki/Q7193","display_name":"Microbiology","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009154","descriptor_name":"Mutation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009154","descriptor_name":"Mutation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009154","descriptor_name":"Mutation","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D014774","descriptor_name":"Virulence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014774","descriptor_name":"Virulence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014774","descriptor_name":"Virulence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1109/embc44109.2020.9175781","is_oa":false,"landing_page_url":"https://doi.org/10.1109/embc44109.2020.9175781","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 42nd Annual International Conference of the IEEE Engineering in Medicine &amp; Biology Society (EMBC)","raw_type":"proceedings-article"},{"id":"pmid:33019302","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33019302","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annual International Conference of the IEEE Engineering in Medicine and Biology Society. IEEE Engineering in Medicine and Biology Society. Annual International Conference","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W1602667807","https://openalex.org/W1922733886","https://openalex.org/W1964670939","https://openalex.org/W1970139872","https://openalex.org/W1970413157","https://openalex.org/W2041993682","https://openalex.org/W2054231318","https://openalex.org/W2059145105","https://openalex.org/W2072572998","https://openalex.org/W2089335658","https://openalex.org/W2101412130","https://openalex.org/W2102514878","https://openalex.org/W2111326065","https://openalex.org/W2114029728","https://openalex.org/W2114031931","https://openalex.org/W2114850508","https://openalex.org/W2117692326","https://openalex.org/W2122732537","https://openalex.org/W2124789022","https://openalex.org/W2129952088","https://openalex.org/W2132073122","https://openalex.org/W2143210482","https://openalex.org/W2144211451","https://openalex.org/W2150575159","https://openalex.org/W2151529283","https://openalex.org/W2154139219","https://openalex.org/W2156078931","https://openalex.org/W2159987716","https://openalex.org/W2174602966","https://openalex.org/W2195303995","https://openalex.org/W2439991713","https://openalex.org/W2527896214","https://openalex.org/W2535426958","https://openalex.org/W2558715006","https://openalex.org/W2582236637","https://openalex.org/W2583025663","https://openalex.org/W2583911935","https://openalex.org/W2613613490","https://openalex.org/W2614443510","https://openalex.org/W2624797932","https://openalex.org/W2752636837","https://openalex.org/W2950945059","https://openalex.org/W3104059174","https://openalex.org/W6664160496","https://openalex.org/W6675264585","https://openalex.org/W6679455070","https://openalex.org/W6685387098","https://openalex.org/W6728098186","https://openalex.org/W6730177376","https://openalex.org/W6732732085","https://openalex.org/W6738297074","https://openalex.org/W6786012642"],"related_works":["https://openalex.org/W2937631562","https://openalex.org/W3195168932","https://openalex.org/W1996541855","https://openalex.org/W3031263788","https://openalex.org/W3214642209","https://openalex.org/W4308949089","https://openalex.org/W2146948107","https://openalex.org/W4211165872","https://openalex.org/W4226048468","https://openalex.org/W3101260801"],"abstract_inverted_index":{"DNA-Sequencing":[0],"of":[1,7,17,48,92,135,154],"tumor":[2,24],"cells":[3],"has":[4],"revealed":[5],"thousands":[6],"genetic":[8],"mutations.":[9],"However,":[10],"cancer":[11],"is":[12,29,33,41,187],"caused":[13],"by":[14],"only":[15],"some":[16],"them.":[18],"Identifying":[19],"mutations":[20],"that":[21,70,144],"contribute":[22],"to":[23,61,74,107,111,184],"growth":[25],"from":[26],"neutral":[27,116],"ones":[28],"extremely":[30],"challenging":[31],"and":[32,44,50,65,84,100,125],"currently":[34],"carried":[35],"out":[36],"manually.":[37],"This":[38],"manual":[39,180],"annotation":[40],"very":[42],"cumbersome":[43],"expensive":[45],"in":[46,150,178],"terms":[47],"time":[49],"money.":[51],"In":[52],"this":[53],"study,":[54],"we":[55],"introduce":[56],"a":[57],"novel":[58],"method":[59,79],"\"NLP-SNPPred\"":[60],"read":[62],"scientific":[63],"literature":[64,83],"learn":[66],"the":[67,81,93,113,136,168],"implicit":[68],"features":[69],"cause":[71],"certain":[72],"variations":[73,157],"be":[75,147],"pathogenic.":[76],"Precisely,":[77],"our":[78],"ingests":[80],"bio-medical":[82],"produces":[85],"its":[86],"vector":[87],"representation":[88],"via":[89],"exploiting":[90],"state":[91,134],"art":[94,137],"NLP":[95,145],"methods":[96,175],"like":[97],"sent2vec,":[98],"word2vec":[99],"tf-idf.":[101],"These":[102],"representations":[103],"are":[104],"then":[105],"fed":[106],"machine":[108,173],"learning":[109,174],"predictors":[110],"identify":[112],"pathogenic":[114],"versus":[115],"variations.":[117],"Our":[118,141],"best":[119],"model":[120],"(NLPSNPPred)":[121],"trained":[122],"on":[123,127],"OncoKB":[124],"evaluated":[126],"several":[128],"publicly":[129],"available":[130,188],"benchmark":[131],"datasets,":[132],"outperformed":[133],"function":[138],"prediction":[139],"methods.":[140],"results":[142],"show":[143],"can":[146,176],"used":[148],"effectively":[149],"predicting":[151],"functional":[152],"impact":[153],"protein":[155],"coding":[156],"with":[158,172],"minimal":[159],"complementary":[160],"biological":[161,165],"features.":[162],"Moreover,":[163],"encoding":[164],"knowledge":[166],"into":[167],"right":[169],"representations,":[170],"combined":[171],"help":[177],"automating":[179],"efforts.":[181],"A":[182],"free":[183],"use":[185],"web-server":[186],"at":[189],"http://www.nlp-snppred.cbrlab.org.":[190]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
