{"id":"https://openalex.org/W2140364813","doi":"https://doi.org/10.1093/bioinformatics/bts042","title":"Literature mining of host\u2013pathogen interactions: comparing feature-based supervised learning and language-based approaches","display_name":"Literature mining of host\u2013pathogen interactions: comparing feature-based supervised learning and language-based approaches","publication_year":2012,"publication_date":"2012-01-27","ids":{"openalex":"https://openalex.org/W2140364813","doi":"https://doi.org/10.1093/bioinformatics/bts042","mag":"2140364813","pmid":"https://pubmed.ncbi.nlm.nih.gov/22285561"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/bts042","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bts042","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010978700","display_name":"Thanh Thieu","orcid":"https://orcid.org/0000-0002-4926-9292"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Thanh Thieu","raw_affiliation_strings":["1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033769407","display_name":"Shashank Joshi","orcid":"https://orcid.org/0000-0002-0990-5821"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sneha Joshi","raw_affiliation_strings":["1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042128403","display_name":"Samantha Warren","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samantha Warren","raw_affiliation_strings":["1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022192689","display_name":"Dmitry Korkin","orcid":"https://orcid.org/0000-0002-3875-9085"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dmitry Korkin","raw_affiliation_strings":["1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA"],"affiliations":[{"raw_affiliation_string":"1 Department of Computer Science, 2MU Informatics Institute and 3Bond Life Science Center, University of Missouri, Columbia, MO 65211, USA","institution_ids":["https://openalex.org/I76835614"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5010978700"],"corresponding_institution_ids":["https://openalex.org/I76835614"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":null,"fwci":1.8387,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.85335295,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"28","issue":"6","first_page":"867","last_page":"875"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7809882164001465},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.6385617256164551},{"id":"https://openalex.org/keywords/host","display_name":"Host (biology)","score":0.5736151933670044},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5698440074920654},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5550362467765808},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5240421891212463},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5084168314933777},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.4870589077472687},{"id":"https://openalex.org/keywords/biomedical-text-mining","display_name":"Biomedical text mining","score":0.4697361886501312},{"id":"https://openalex.org/keywords/organism","display_name":"Organism","score":0.4266442060470581},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3775637745857239},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3560105562210083},{"id":"https://openalex.org/keywords/text-mining","display_name":"Text mining","score":0.19403308629989624},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.12236610054969788}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7809882164001465},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.6385617256164551},{"id":"https://openalex.org/C126831891","wikidata":"https://www.wikidata.org/wiki/Q221673","display_name":"Host (biology)","level":2,"score":0.5736151933670044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5698440074920654},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5550362467765808},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5240421891212463},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5084168314933777},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.4870589077472687},{"id":"https://openalex.org/C165141518","wikidata":"https://www.wikidata.org/wiki/Q4915126","display_name":"Biomedical text mining","level":3,"score":0.4697361886501312},{"id":"https://openalex.org/C137858568","wikidata":"https://www.wikidata.org/wiki/Q7239","display_name":"Organism","level":2,"score":0.4266442060470581},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3775637745857239},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3560105562210083},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.19403308629989624},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.12236610054969788},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000818","descriptor_name":"Animals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007239","descriptor_name":"Infections","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D007239","descriptor_name":"Infections","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D007239","descriptor_name":"Infections","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D011506","descriptor_name":"Proteins","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D039781","descriptor_name":"PubMed","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D039781","descriptor_name":"PubMed","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D039781","descriptor_name":"PubMed","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D054884","descriptor_name":"Host-Pathogen Interactions","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D054884","descriptor_name":"Host-Pathogen Interactions","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D054884","descriptor_name":"Host-Pathogen Interactions","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D060388","descriptor_name":"Support Vector Machine","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D060388","descriptor_name":"Support Vector Machine","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D060388","descriptor_name":"Support Vector Machine","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1093/bioinformatics/bts042","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/bts042","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:22285561","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/22285561","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7699999809265137}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W138910040","https://openalex.org/W271111964","https://openalex.org/W1502700877","https://openalex.org/W1537662695","https://openalex.org/W1567787856","https://openalex.org/W1979601325","https://openalex.org/W1980127292","https://openalex.org/W1981805898","https://openalex.org/W1986245516","https://openalex.org/W1987146951","https://openalex.org/W2013286714","https://openalex.org/W2032243072","https://openalex.org/W2036565532","https://openalex.org/W2037442572","https://openalex.org/W2038721957","https://openalex.org/W2047990563","https://openalex.org/W2048140075","https://openalex.org/W2051689038","https://openalex.org/W2059325738","https://openalex.org/W2059655508","https://openalex.org/W2064030835","https://openalex.org/W2081771431","https://openalex.org/W2090248239","https://openalex.org/W2096525273","https://openalex.org/W2097960255","https://openalex.org/W2100592161","https://openalex.org/W2101727078","https://openalex.org/W2101730352","https://openalex.org/W2102770365","https://openalex.org/W2104768328","https://openalex.org/W2105692091","https://openalex.org/W2115048111","https://openalex.org/W2122305935","https://openalex.org/W2122904379","https://openalex.org/W2123939104","https://openalex.org/W2124617249","https://openalex.org/W2126276057","https://openalex.org/W2126469373","https://openalex.org/W2130687290","https://openalex.org/W2136992683","https://openalex.org/W2139119391","https://openalex.org/W2139259976","https://openalex.org/W2147818576","https://openalex.org/W2148395543","https://openalex.org/W2148603752","https://openalex.org/W2148853951","https://openalex.org/W2158505321","https://openalex.org/W2159126118","https://openalex.org/W2161062388","https://openalex.org/W2163980860","https://openalex.org/W2164620224","https://openalex.org/W2166057395","https://openalex.org/W2169250301","https://openalex.org/W2914853499","https://openalex.org/W2916767477","https://openalex.org/W2917375902","https://openalex.org/W3007616136","https://openalex.org/W3089319657","https://openalex.org/W4235505822","https://openalex.org/W4236324471","https://openalex.org/W6610021177","https://openalex.org/W6634123513","https://openalex.org/W6646967066","https://openalex.org/W6647258397","https://openalex.org/W6673154977","https://openalex.org/W6783763832","https://openalex.org/W7071374342"],"related_works":["https://openalex.org/W24843447","https://openalex.org/W94193318","https://openalex.org/W2073808674","https://openalex.org/W322176015","https://openalex.org/W4287688258","https://openalex.org/W3049211950","https://openalex.org/W2539274330","https://openalex.org/W161645463","https://openalex.org/W2135912801","https://openalex.org/W3013434114"],"abstract_inverted_index":{"Here,":[0],"we":[1],"introduce":[2],"and":[3,23,29,70,86,112,139],"compare":[4],"two":[5],"new":[6],"approaches":[7,108,135],"to":[8,121],"automatically":[9],"detect":[10],"whether":[11],"the":[12,25,33,55,59,67,104,127,142,155],"title":[13],"or":[14,73],"abstract":[15],"of":[16,66],"a":[17,39,94,122],"PubMed":[18],"publication":[19],"contains":[20],"HPI":[21,117],"data,":[22],"extract":[24],"information":[26],"about":[27],"organisms":[28,69],"proteins":[30,72],"involved":[31],"in":[32,141],"interaction.":[34],"The":[35,49,90,107,145],"first":[36],"approach":[37,124,149],"is":[38],"feature-based":[40],"supervised":[41],"learning":[42],"method":[43,92],"using":[44],"support":[45],"vector":[46],"machines":[47],"(SVMs).":[48],"SVM":[50],"models":[51],"are":[52],"trained":[53,111],"on":[54,114,126,154],"features":[56,63],"derived":[57,102],"from":[58,103],"individual":[60],"sentences.":[61],"These":[62],"include":[64],"names":[65],"host/pathogen":[68],"corresponding":[71],"genes,":[74],"keywords":[75],"describing":[76],"HPI-specific":[77],"information,":[78,83],"more":[79],"general":[80],"protein-protein":[81,129],"interaction":[82,130],"experimental":[84],"methods":[85],"other":[87],"statistical":[88],"information.":[89],"language-based":[91],"employed":[93],"link":[95],"grammar":[96],"parser":[97],"combined":[98],"with":[99],"semantic":[100],"patterns":[101],"training":[105],"examples.":[106],"have":[109],"been":[110],"tested":[113],"manually":[115],"curated":[116],"data.":[118],"When":[119],"compared":[120],"na\u00efve":[123],"based":[125],"existing":[128],"literature":[131],"mining":[132],"method,":[133],"our":[134],"demonstrated":[136],"higher":[137],"accuracy":[138],"recall":[140],"classification":[143],"task.":[144],"most":[146],"accurate,":[147],"feature-based,":[148],"achieved":[150],"66-73%":[151],"accuracy,":[152],"depending":[153],"test":[156],"protocol.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":5},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
