{"id":"https://openalex.org/W2747014888","doi":"https://doi.org/10.1145/3107411.3108193","title":"Protein Classification using Modified <i>N-Gram</i> and <i>Skip-Gram</i> Models","display_name":"Protein Classification using Modified <i>N-Gram</i> and <i>Skip-Gram</i> Models","publication_year":2017,"publication_date":"2017-08-20","ids":{"openalex":"https://openalex.org/W2747014888","doi":"https://doi.org/10.1145/3107411.3108193","mag":"2747014888"},"language":"en","primary_location":{"id":"doi:10.1145/3107411.3108193","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3108193","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001989548","display_name":"SM Ashiqul Islam","orcid":null},"institutions":[{"id":"https://openalex.org/I157394403","display_name":"Baylor University","ror":"https://ror.org/005781934","country_code":"US","type":"education","lineage":["https://openalex.org/I157394403"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"SM Ashiqul Islam","raw_affiliation_strings":["Baylor University, Waco, TX, USA"],"affiliations":[{"raw_affiliation_string":"Baylor University, Waco, TX, USA","institution_ids":["https://openalex.org/I157394403"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061939858","display_name":"Christopher M. Kearney","orcid":"https://orcid.org/0000-0002-1945-7815"},"institutions":[{"id":"https://openalex.org/I157394403","display_name":"Baylor University","ror":"https://ror.org/005781934","country_code":"US","type":"education","lineage":["https://openalex.org/I157394403"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Michel Kearney","raw_affiliation_strings":["Baylor University, Waco, TX, USA"],"affiliations":[{"raw_affiliation_string":"Baylor University, Waco, TX, USA","institution_ids":["https://openalex.org/I157394403"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011177406","display_name":"Ankan Choudhury","orcid":"https://orcid.org/0000-0003-2060-8884"},"institutions":[{"id":"https://openalex.org/I157394403","display_name":"Baylor University","ror":"https://ror.org/005781934","country_code":"US","type":"education","lineage":["https://openalex.org/I157394403"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ankan Choudhury","raw_affiliation_strings":["Baylor University, Waco, TX, USA"],"affiliations":[{"raw_affiliation_string":"Baylor University, Waco, TX, USA","institution_ids":["https://openalex.org/I157394403"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052536656","display_name":"Erich J. Baker","orcid":"https://orcid.org/0000-0002-7798-5704"},"institutions":[{"id":"https://openalex.org/I157394403","display_name":"Baylor University","ror":"https://ror.org/005781934","country_code":"US","type":"education","lineage":["https://openalex.org/I157394403"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erich J. Baker","raw_affiliation_strings":["Baylor University, Waco, TX, USA"],"affiliations":[{"raw_affiliation_string":"Baylor University, Waco, TX, USA","institution_ids":["https://openalex.org/I157394403"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5001989548"],"corresponding_institution_ids":["https://openalex.org/I157394403"],"apc_list":null,"apc_paid":null,"fwci":0.9085,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.76947053,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"586","last_page":"586"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7234023809432983},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6933107376098633},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.6802991628646851},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6395577788352966},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5395803451538086},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4707917273044586},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4494004249572754},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.4171084761619568},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.41562992334365845},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35619163513183594},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33592844009399414},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.1241489052772522}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7234023809432983},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6933107376098633},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.6802991628646851},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6395577788352966},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5395803451538086},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4707917273044586},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4494004249572754},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.4171084761619568},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.41562992334365845},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35619163513183594},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33592844009399414},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.1241489052772522},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3107411.3108193","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3108193","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4399999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2747895175","https://openalex.org/W2614669534","https://openalex.org/W2123014508","https://openalex.org/W2104406636","https://openalex.org/W2741028958","https://openalex.org/W2105512057","https://openalex.org/W4252625449","https://openalex.org/W2115788596","https://openalex.org/W4387784741","https://openalex.org/W3131201603"],"abstract_inverted_index":{"Machine":[0],"Learning":[1],"(ML)-based":[2],"classification":[3,116,132,208,242],"of":[4,37,43,46,86,107,112,139,153,172,202,209,218,246],"protein":[5,38,154,247],"characteristics":[6,155],"from":[7,182],"primary":[8],"sequences":[9],"is":[10,226],"an":[11,70,97],"important":[12],"tool":[13],"for":[14,150,221,244],"exploring":[15],"candidate":[16],"proteins":[17,45,210],"in":[18,134,163],"targeted":[19],"drug":[20],"discovery,":[21],"mutational":[22],"analysis,":[23],"and":[24,35,41,88,127,130,186,192],"functional":[25,126,207],"identification.":[26],"However,":[27],"ML":[28,241],"feature":[29,63,73,223,231],"selection":[30],"requires":[31],"extensive":[32],"manual":[33],"curation":[34],"knowledge":[36,144],"chemistry,":[39],"interactions,":[40],"micro-environment":[42],"the":[44,146,151,190,200,216,236],"interest.":[47],"Current":[48],"approaches":[49,117,159],"include":[50],"amino":[51],"acid":[52],"composition":[53],"strategies,":[54],"specific":[55],"motif":[56],"analysis":[57],"or":[58],"Quantitative":[59],"Structure-Activity":[60],"Relationship":[61],"(QSAR)-based":[62],"generation":[64,74,232],"methods.":[65],"In":[66],"contrast,":[67],"we":[68],"propose":[69],"automated":[71,230],"generalized":[72],"method":[75],"based":[76],"on":[77,141,169,174,189,205],"Natural":[78],"Language":[79],"Processing":[80],"(NLP),":[81],"using":[82,96],"a":[83,103,206,212],"modified":[84],"combination":[85],"N-Gram":[87],"Skip-Gram":[89],"models":[90,123],"(m-NGSG).":[91],"Optimal":[92],"parameters":[93],"are":[94],"selected":[95],"adapted":[98],"grid":[99],"search":[100],"algorithm,":[101],"enabling":[102],"high-throughput":[104],"global":[105],"application":[106],"our":[108],"approach.":[109],"A":[110,166],"meta-comparison":[111],"logistic":[113],"regression":[114],"mediated":[115],"exploiting":[118],"m-NGSG":[119,147,173,203],"with":[120,178],"other":[121],"published":[122],"illustrates":[124],"enhanced":[125],"structural":[128],"binary":[129],"multi-class":[131],"accuracy":[133,188],"every":[135],"instance.":[136],"The":[137,196],"lack":[138],"dependence":[140],"detailed":[142],"physicochemical":[143],"makes":[145],"approach":[148,233],"ideal":[149],"exploration":[152],"recalcitrant":[156],"to":[157,239],"previous":[158],"without":[160,215],"any":[161],"loss":[162],"predictive":[164],"accuracy.":[165],"further":[167],"test":[168,193],"prediction":[170,245],"quality":[171],"cationic":[175],"channel":[176],"blockers":[177],"70%":[179],"sequence":[180],"identity":[181],"Arthropods":[183],"demonstrated":[184],"94.10%":[185],"92.30%":[187],"training":[191],"set,":[194],"respectively.":[195],"latter":[197],"study":[198],"demonstrates":[199],"applicability":[201],"model":[204],"employing":[211],"novel":[213],"dataset.Thus,":[214],"requirement":[217],"expert":[219],"intervention":[220],"optimal":[222],"selection,":[224],"it":[225],"hoped":[227],"that":[228],"this":[229],"will":[234],"reduce":[235],"time":[237],"needed":[238],"employ":[240],"strategies":[243],"characteristics.":[248]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
