{"id":"https://openalex.org/W2014849717","doi":"https://doi.org/10.1371/journal.pcbi.0040020","title":"Getting Started in Text Mining","display_name":"Getting Started in Text Mining","publication_year":2008,"publication_date":"2008-01-01","ids":{"openalex":"https://openalex.org/W2014849717","doi":"https://doi.org/10.1371/journal.pcbi.0040020","mag":"2014849717","pmid":"https://pubmed.ncbi.nlm.nih.gov/18225946"},"language":"en","primary_location":{"id":"doi:10.1371/journal.pcbi.0040020","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.0040020","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.0040020&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.0040020&type=printable","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062221063","display_name":"Kevin Bretonnel Cohen","orcid":"https://orcid.org/0000-0003-1749-8290"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"K. Bretonnel Cohen","raw_affiliation_strings":["University of Colorado School of Medicine, Center for Computational Pharmacology, UCHSC at Fitzsimons, Department of Pharmacology, Aurora, Colorado, United States of America. kevin.cohen@gmail.com","University of Colorado School of Medicine, Center for Computational Pharmacology, UCHSC at Fitzsimons, Department of Pharmacology, Aurora, Colorado, United States of America"],"affiliations":[{"raw_affiliation_string":"University of Colorado School of Medicine, Center for Computational Pharmacology, UCHSC at Fitzsimons, Department of Pharmacology, Aurora, Colorado, United States of America. kevin.cohen@gmail.com","institution_ids":[]},{"raw_affiliation_string":"University of Colorado School of Medicine, Center for Computational Pharmacology, UCHSC at Fitzsimons, Department of Pharmacology, Aurora, Colorado, United States of America","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041860080","display_name":"Lawrence Hunter","orcid":"https://orcid.org/0000-0003-1455-3370"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lawrence Hunter","raw_affiliation_strings":["UCHSC, Aurora, Colorado, United States of America","Princeton University, United States of America","University of Colorado School of Medicine, Center for Computational Pharmacology, UCHSC at Fitzsimons, Department of Pharmacology, Aurora, Colorado, United States of America. Lawrence Hunter is with the Center for Computational Pharmacology,"],"affiliations":[{"raw_affiliation_string":"UCHSC, Aurora, Colorado, United States of America","institution_ids":[]},{"raw_affiliation_string":"Princeton University, United States of America","institution_ids":["https://openalex.org/I20089843"]},{"raw_affiliation_string":"University of Colorado School of Medicine, Center for Computational Pharmacology, UCHSC at Fitzsimons, Department of Pharmacology, Aurora, Colorado, United States of America. Lawrence Hunter is with the Center for Computational Pharmacology,","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062221063"],"corresponding_institution_ids":[],"apc_list":{"value":2655,"currency":"USD","value_usd":2655},"apc_paid":{"value":2655,"currency":"USD","value_usd":2655},"fwci":8.2987,"has_fulltext":true,"cited_by_count":232,"citation_normalized_percentile":{"value":0.98294939,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"4","issue":"1","first_page":"e20","last_page":"e20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/biomedical-text-mining","display_name":"Biomedical text mining","score":0.7446122765541077},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6466785669326782},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6223719120025635},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.527369499206543},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5152738094329834},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.46067461371421814},{"id":"https://openalex.org/keywords/text-mining","display_name":"Text mining","score":0.3502614498138428},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.28071439266204834}],"concepts":[{"id":"https://openalex.org/C165141518","wikidata":"https://www.wikidata.org/wiki/Q4915126","display_name":"Biomedical text mining","level":3,"score":0.7446122765541077},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6466785669326782},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6223719120025635},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.527369499206543},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5152738094329834},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.46067461371421814},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.3502614498138428},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28071439266204834},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D010506","descriptor_name":"Periodicals as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D010506","descriptor_name":"Periodicals as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D010506","descriptor_name":"Periodicals as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D010506","descriptor_name":"Periodicals as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":5,"locations":[{"id":"doi:10.1371/journal.pcbi.0040020","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.0040020","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.0040020&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},{"id":"pmid:18225946","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/18225946","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS computational biology","raw_type":null},{"id":"pmh:oai:doaj.org/article:97f1d884095e435086401d27d25c69f5","is_oa":true,"landing_page_url":"https://doaj.org/article/97f1d884095e435086401d27d25c69f5","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Computational Biology, Vol 4, Iss 1, p e20 (2008)","raw_type":"article"},{"id":"pmh:oai:figshare.com:article/153619","is_oa":true,"landing_page_url":"https://figshare.com/articles/Getting_Started_in_Text_Mining/153619","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},{"id":"pmh:oai:pubmedcentral.nih.gov:2217579","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2217579","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PLoS Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1371/journal.pcbi.0040020","is_oa":true,"landing_page_url":"https://doi.org/10.1371/journal.pcbi.0040020","pdf_url":"https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.0040020&type=printable","source":{"id":"https://openalex.org/S86033158","display_name":"PLoS Computational Biology","issn_l":"1553-734X","issn":["1553-734X","1553-7358"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315706","host_organization_name":"Public Library of Science","host_organization_lineage":["https://openalex.org/P4310315706"],"host_organization_lineage_names":["Public Library of Science"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"PLoS Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2014849717.pdf","grobid_xml":"https://content.openalex.org/works/W2014849717.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W108101778","https://openalex.org/W243513880","https://openalex.org/W626058384","https://openalex.org/W1553529581","https://openalex.org/W1579838312","https://openalex.org/W1593657456","https://openalex.org/W1954715867","https://openalex.org/W1976316416","https://openalex.org/W2152698860","https://openalex.org/W2159203162","https://openalex.org/W2166057395","https://openalex.org/W2188034239","https://openalex.org/W2742727304","https://openalex.org/W4240689716"],"related_works":["https://openalex.org/W3185751515","https://openalex.org/W1479703980","https://openalex.org/W2749535755","https://openalex.org/W1553529581","https://openalex.org/W4206039273","https://openalex.org/W4232119327","https://openalex.org/W2911489562","https://openalex.org/W2134429551","https://openalex.org/W3183808544","https://openalex.org/W2188854577"],"abstract_inverted_index":{"Text":[0,180,498],"mining":[1,30,84,214,234,313,334,343,508,627,782],"is":[2,353,526,551,655,672,708,896,940,951,962,1005,1048,1056,1072,1092,1097,1101,1113,1141],"the":[3,10,17,47,50,66,89,95,122,126,130,134,140,156,172,203,239,254,294,306,380,425,444,457,476,543,563,568,577,589,599,633,664,670,675,679,723,850,860,897,901,921,948,990,996,1017,1052,1063,1073,1111],"use":[4,534,640,685,859],"of":[5,13,36,49,60,76,91,97,104,113,125,139,159,206,230,257,348,398,410,424,427,475,493,521,523,535,538,545,570,579,606,620,632,666,678,696,699,705,722,750,775,808,818,852,862,869,891,899,903,917,923,925,935,960,1021,1024,1076],"automated":[6],"methods":[7,243],"for":[8,27,70,93,109,146,182,188,249,275,610,617,765,772,975,1085],"exploiting":[9],"enormous":[11],"amount":[12,807],"knowledge":[14,547,554,566],"available":[15],"in":[16,46,62,73,88,129,143,155,171,238,253,293,350,379,386,497,562,598,649,668,729,855,930,937,965],"biomedical":[18,82,160,240,311,564,625,780,966],"literature.":[19],"There":[20],"are":[21,34,192,198,320,325,368,384,560,797,847,1135],"at":[22,345,719,726,914,1038],"least":[23,1039],"as":[24,32,263,265,356,365,449,456,471,488,841,867],"many":[25,67,332,412,727],"motivations":[26],"doing":[28],"text":[29,83,161,213,233,312,333,342,352,507,626,781,939],"work":[31],"there":[33],"types":[35,229,522],"bioscientists.":[37,218],"Model":[38],"organism":[39],"database":[40],"curators":[41],"have":[42,80,116,166,208,235,584,714,788,1037],"been":[43,167,209,236,715],"heavy":[44],"participants":[45],"development":[48,90],"field":[51],"due":[52],"to":[53,56,64,86,101,119,202,232,340,395,470,506,623,691,752,757,778,803,810,842,849,927,942,988,992,1008,1015,1059,1127],"their":[54,74,411,789],"need":[55],"process":[57],"large":[58,264,816],"numbers":[59],"publications":[61],"order":[63],"populate":[65],"data":[68],"fields":[69],"every":[71,114,915],"gene":[72,453,943,1041,1055],"species":[75,1053],"interest.":[77],"Bench":[78],"scientists":[79],"built":[81,117,168,210,328,716],"applications":[85,118],"aid":[87],"tools":[92],"interpreting":[94],"output":[96,861],"high-throughput":[98],"assays":[99],"and":[100,138,169,177,184,197,289,305,324,419,443,514,576,588,615,688,725,770,784,791,826,908,945,968,1034,1081,1100,1106,1124,1154],"improve":[102],"searches":[103],"sequence":[105],"databases":[106],"(see":[107,179],"[1]":[108],"a":[110,259,269,284,300,351,357,372,618,635,647,681,693,773,805,835,863,956,1022,1098,1114,1130],"review).":[111],"Bioscientists":[112],"stripe":[115],"deal":[120,376,894],"with":[121,377,495,585,657,895,1103,1107,1118],"dual":[123],"issues":[124,142,483,929],"double-exponential":[127],"growth":[128],"scientific":[131],"literature":[132],"over":[133],"past":[135,173],"few":[136,174,1131],"years":[137],"unique":[141],"searching":[144],"PubMed/MEDLINE":[145],"genomics-related":[147],"publications.":[148],"A":[149,932],"surprising":[150],"phenomenon":[151],"can":[152,583,829,1060],"be":[153,225,341,393,468,596,830,1016,1061,1068],"noted":[154],"recent":[157],"history":[158],"mining:":[162],"although":[163],"several":[164],"systems":[165,314,319,344,525,532,713,736,786,796,813,828,874],"deployed":[170],"years\u2014Chilibot,":[175],"Textpresso,":[176],"PreBIND":[178],"S1":[181],"these":[183],"most":[185],"other":[186,676],"citations),":[187],"example\u2014the":[189],"ones":[190],"that":[191,251,287,382,572,581,711,741,838,888,976],"seeing":[193],"high":[194],"usage":[195],"rates":[196],"making":[199,700],"productive":[200],"contributions":[201],"working":[204],"lives":[205],"bioscientists":[207,573],"not":[211,326,337,845,1067,1093,1136],"by":[212,217,396,408,422,592,738,883,994],"specialists,":[215],"but":[216,261,317,1010],"We":[219],"speculate":[220],"on":[221,744,1149],"why":[222],"this":[223],"might":[224,298,541,595,639,684,858],"so":[226],"below.\r\n\r\nThree":[227],"basic":[228],"approaches":[231,505,622,777],"prevalent":[237],"domain.":[241],"Co-occurrence\u2013based":[242],"do":[244],"no":[245],"more":[246,362,485,500,503,980],"than":[247,981],"look":[248],"concepts":[250,349,383],"occur":[252],"same":[255,295],"unit":[256],"text\u2014typically":[258],"sentence,":[260,296],"sometimes":[262,354],"an":[266,276,611,766,877,953,1082],"abstract\u2014and":[267],"posit":[268],"relationship":[270,301],"between":[271,302,906,1151],"them.":[272],"(See":[273,608,763],"[2]":[274],"early":[277,310,435,612,767],"co-occurrence\u2013based":[278,373],"system.)":[279],"For":[280,389,793,833],"example,":[281,390,643,794,834],"if":[282,985,1051,1138],"such":[283,318],"system":[285,374,638,683,837,892],"saw":[286],"BRCA1":[288,307,391],"breast":[290,303,432,450,460,465,472],"cancer":[291,304,433,451,461,466],"occurred":[292],"it":[297,952],"assume":[299],"gene.":[308],"Some":[309],"were":[315],"co-occurrence\u2013based,":[316],"highly":[321],"error":[322],"prone,":[323],"commonly":[327],"today.":[329],"In":[330,529,731,823],"fact,":[331],"practitioners":[335],"would":[336],"consider":[338],"them":[339,496],"all.":[346],"Co-occurrence":[347],"used":[355],"simple":[358,636],"baseline":[359],"when":[360],"evaluating":[361],"sophisticated":[363,486,686],"systems;":[364],"such,":[366],"they":[367,582,594,846],"nontrivial,":[369],"since":[370],"even":[371,984],"must":[375,893],"variability":[378,482],"ways":[381,492,698],"expressed":[385],"human-produced":[387],"texts.":[388],"could":[392,467],"referred":[394,469,1058],"any":[397,409,423,602,745],"its":[399,428,870],"alternate":[400,1083],"symbols\u2014IRIS,":[401],"PSCP,":[402],"BRCAI,":[403],"BRCC1,":[404],"or":[405,455,478,511,516,601,604,652,734,761,844,910,955,1019,1046,1145],"RNF53":[406],"(or":[407],"spelling":[413],"variants,":[414],"which":[415,593,669],"include":[416],"BRCA1,":[417],"BRCA-1,":[418],"BRCA":[420],"1)\u2014or":[421],"variants":[426],"full":[429,759],"name,":[430,1043],"viz.":[431],"1,":[434,454],"onset":[436],"(its":[437],"official":[438,1044,1074],"name":[439,1018,1128],"per":[440],"Entrez":[441,1077,1086],"Gene":[442,446,1078,1087],"Human":[445],"Nomenclature":[447],"Committee),":[448],"susceptibility":[452,462],"latter's":[458],"variant":[459,590],"gene-1.":[463],"Similarly,":[464],"cancer,":[473],"carcinoma":[474],"breast,":[477],"mammary":[479],"neoplasm.":[480],"These":[481,1133],"challenge":[484],"systems,":[487],"well;":[489],"we":[490,986],"discuss":[491],"coping":[494],"S1.\r\n\r\nTwo":[499],"common":[501,933],"(and":[502,979],"sophisticated)":[504],"exist:":[509],"rule-based":[510,524,531,613,621,637,682,827,864,884],"knowledge-based":[512],"approaches,":[513],"statistical":[515,733,785,825,836,879],"machine-learning-based":[517],"approaches.":[518],"The":[519,1090,1095],"variety":[520],"quite":[527],"wide.":[528],"general,":[530],"make":[533],"some":[536],"sort":[537],"knowledge.":[539],"This":[540,1003],"take":[542,804],"form":[544],"general":[546],"about":[548,555,567,575,663,702],"how":[549,556],"language":[550,907],"structured,":[552],"specific":[553],"biologically":[557],"relevant":[558,848],"facts":[559],"stated":[561],"literature,":[565,600],"sets":[569],"things":[571,667],"talk":[574],"kinds":[578],"relationships":[580,905],"one":[586,630,868,1040],"another,":[587],"forms":[591],"mentioned":[597],"subset":[603],"combination":[605],"these.":[607],"[3]":[609],"system,":[614,769],"[4]":[616,771],"discussion":[619,774],"various":[624,779],"tasks.)":[628],"At":[629,674],"end":[631,677],"spectrum,":[634,680,724],"hard-coded":[641],"patterns\u2014for":[642],"plays":[646],"role":[648],"associated":[656,1102,1117],"\u2014to":[659],"find":[660],"explicit":[661],"statements":[662],"classes":[665,704],"researcher":[671],"interested.":[673],"linguistic":[687,918],"semantic":[689],"analyses":[690],"recognize":[692],"wide":[694],"range":[695],"possible":[697],"assertions":[701],"those":[703],"things.":[706],"It":[707],"worth":[709],"noting":[710],"useful":[712],"using":[717],"technologies":[718],"both":[720],"ends":[721],"points":[728],"between.":[730],"contrast,":[732],"machine-learning\u2013based":[735],"operate":[737,743],"building":[739],"classifiers":[740],"may":[742,1065],"level,":[746],"from":[747,920],"labelling":[748],"part":[749,922,959],"speech":[751,924,961],"choosing":[753],"syntactic":[754],"parse":[755],"trees":[756],"classifying":[758],"sentences":[760],"documents.":[762],"[5]":[764],"learning-based":[768,776],"tasks.)\r\n\r\nRule-based":[783],"each":[787],"advantages":[790],"disadvantages.":[792],"rule":[795],"often":[798],"assumed":[799],"(not":[800],"necessarily":[801],"correctly)":[802],"significant":[806],"time":[809],"develop.":[811],"Statistical":[812],"typically":[814],"require":[815],"amounts":[817],"expensive-to-get":[819],"labelled":[820],"training":[821],"data.":[822],"practice,":[824],"fruitfully":[831],"combined.":[832],"classifies":[839],"documents":[840],"whether":[843],"subject":[851],"genetic":[853,1000],"variation":[854],"mouse":[856],"genes":[857],"mutation":[865],"recognizer":[866],"feature":[871],"extractors.":[872],"Many":[873],"also":[875,1012],"employ":[876],"initial":[878],"processing":[880],"step,":[881],"followed":[882],"post-processing.\r\n\r\nA":[885],"primary":[886],"problem":[887],"either":[889],"type":[890],"issue":[898],"ambiguity:":[900],"existence":[902],"multiple":[904],"meanings":[909],"categories.":[911],"Ambiguity":[912],"exists":[913],"level":[916],"structure,":[919],"words":[926],"subtle":[928],"pragmatics.":[931],"example":[934],"ambiguity":[936,1004,1064],"genomics":[938,993],"related":[941],"names":[944],"symbols.":[946],"Consider":[947],"string":[949],"fat:":[950],"adjective,":[954],"noun?":[957],"Either":[958],"entirely":[963],"plausible":[964],"texts,":[967],"PubMed":[969],"returns":[970],"almost":[971],"112":[972],"K":[973,983],"hits":[974],"single-word":[977],"query":[978,991],"13":[982],"try":[987],"restrict":[989],"including":[995],"disjunction":[997],"(gene":[998],"OR":[999,1001],"genetics).":[1002],"relatively":[1006],"easy":[1007],"resolve,":[1009],"fat":[1011,1071],"turns":[1013],"out":[1014],"symbol":[1020,1075,1084],"number":[1023],"different":[1025],"genes\u2014humans,":[1026],"mice,":[1027],"rats,":[1028],"Drosophila,":[1029],"zebrafish,":[1030],"chickens,":[1031],"M.":[1032],"mulatta,":[1033],"two":[1035],"Lactobacilli":[1036],"whose":[1042,1054],"symbol,":[1045],"alias":[1047],"fat.":[1049],"Even":[1050],"being":[1057],"determined,":[1062],"still":[1066],"resolved\u2014in":[1069],"humans,":[1070],"entry":[1079,1088],"2195":[1080],"948.":[1089],"distinction":[1091],"trivial.":[1094],"former":[1096],"cadhedrin,":[1099],"tumor":[1104],"suppression":[1105],"bipolar":[1108],"disorder,":[1109],"while":[1110],"latter":[1112],"thrombospondin":[1115],"receptor":[1116],"atherosclerosis,":[1119],"platelet":[1120],"glycoprotein":[1121],"deficiency,":[1122],"hyperlipidemia,":[1123],"insulin":[1125],"resistance,":[1126],"just":[1129],"phenotypes.":[1132],"ambiguities":[1134],"trivial:":[1137],"your":[1139],"analysis":[1140],"wrong,":[1142],"you":[1143],"miss":[1144],"erroneously":[1146],"extract":[1147],"information":[1148],"relations":[1150],"molecular":[1152],"biology":[1153],"human":[1155],"disease.":[1156]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":17},{"year":2018,"cited_by_count":10},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":17},{"year":2014,"cited_by_count":24},{"year":2013,"cited_by_count":21},{"year":2012,"cited_by_count":13}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
