{"id":"https://openalex.org/W1564129409","doi":"https://doi.org/10.1109/csb.2003.1227432","title":"Refining the extraction of relevant documents from biomedical literature to create a corpus for pathway text mining","display_name":"Refining the extraction of relevant documents from biomedical literature to create a corpus for pathway text mining","publication_year":2004,"publication_date":"2004-03-30","ids":{"openalex":"https://openalex.org/W1564129409","doi":"https://doi.org/10.1109/csb.2003.1227432","mag":"1564129409"},"language":"en","primary_location":{"id":"doi:10.1109/csb.2003.1227432","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csb.2003.1227432","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Systems Bioinformatics. CSB2003. Proceedings of the 2003 IEEE Bioinformatics Conference. CSB2003","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015411167","display_name":"H. Harte","orcid":null},"institutions":[{"id":"https://openalex.org/I1296342797","display_name":"Pharmaceutical Product Development (United States)","ror":"https://ror.org/01sjx9496","country_code":"US","type":"company","lineage":["https://openalex.org/I1296342797"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"H. Harte","raw_affiliation_strings":["PPD Discovery, Inc., Menlo Park, CA, USA","PPD Discovery, Inc., Mento Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"PPD Discovery, Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]},{"raw_affiliation_string":"PPD Discovery, Inc., Mento Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059027378","display_name":"Yijuan Lu","orcid":"https://orcid.org/0000-0002-9855-8365"},"institutions":[{"id":"https://openalex.org/I1296342797","display_name":"Pharmaceutical Product Development (United States)","ror":"https://ror.org/01sjx9496","country_code":"US","type":"company","lineage":["https://openalex.org/I1296342797"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Y. Lu","raw_affiliation_strings":["PPD Discovery, Inc., Menlo Park, CA, USA","PPD Discovery, Inc., Mento Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"PPD Discovery, Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]},{"raw_affiliation_string":"PPD Discovery, Inc., Mento Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109923680","display_name":"S. B. Osborn","orcid":null},"institutions":[{"id":"https://openalex.org/I1296342797","display_name":"Pharmaceutical Product Development (United States)","ror":"https://ror.org/01sjx9496","country_code":"US","type":"company","lineage":["https://openalex.org/I1296342797"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S. Osborn","raw_affiliation_strings":["PPD Discovery, Inc., Menlo Park, CA, USA","PPD Discovery, Inc., Mento Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"PPD Discovery, Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]},{"raw_affiliation_string":"PPD Discovery, Inc., Mento Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055761904","display_name":"D. Dehoney","orcid":null},"institutions":[{"id":"https://openalex.org/I1296342797","display_name":"Pharmaceutical Product Development (United States)","ror":"https://ror.org/01sjx9496","country_code":"US","type":"company","lineage":["https://openalex.org/I1296342797"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"D. Dehoney","raw_affiliation_strings":["PPD Discovery, Inc., Menlo Park, CA, USA","PPD Discovery, Inc., Mento Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"PPD Discovery, Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]},{"raw_affiliation_string":"PPD Discovery, Inc., Mento Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076088202","display_name":"Daniel J. Chin","orcid":"https://orcid.org/0000-0002-2266-9501"},"institutions":[{"id":"https://openalex.org/I1296342797","display_name":"Pharmaceutical Product Development (United States)","ror":"https://ror.org/01sjx9496","country_code":"US","type":"company","lineage":["https://openalex.org/I1296342797"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"D. Chin","raw_affiliation_strings":["PPD Discovery, Inc., Menlo Park, CA, USA","PPD Discovery, Inc., Mento Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"PPD Discovery, Inc., Menlo Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]},{"raw_affiliation_string":"PPD Discovery, Inc., Mento Park, CA, USA","institution_ids":["https://openalex.org/I1296342797"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5015411167"],"corresponding_institution_ids":["https://openalex.org/I1296342797"],"apc_list":null,"apc_paid":null,"fwci":0.1169,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.45032979,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"644","last_page":"645"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.965499997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8021234273910522},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.77128005027771},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5322558283805847},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.5104297995567322},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4765249490737915},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4678400754928589},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.4458662271499634},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.44201144576072693},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4214929938316345},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3474523425102234},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.11098629236221313}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8021234273910522},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.77128005027771},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5322558283805847},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.5104297995567322},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4765249490737915},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4678400754928589},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.4458662271499634},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.44201144576072693},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4214929938316345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3474523425102234},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.11098629236221313},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/csb.2003.1227432","is_oa":false,"landing_page_url":"https://doi.org/10.1109/csb.2003.1227432","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Systems Bioinformatics. CSB2003. Proceedings of the 2003 IEEE Bioinformatics Conference. CSB2003","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W4508078","https://openalex.org/W2103017472","https://openalex.org/W2143349571","https://openalex.org/W2152183901","https://openalex.org/W6681291824"],"related_works":["https://openalex.org/W1557094818","https://openalex.org/W3024364549","https://openalex.org/W4206019083","https://openalex.org/W2048865712","https://openalex.org/W2054476758","https://openalex.org/W1976265003","https://openalex.org/W2370378377","https://openalex.org/W2183246718","https://openalex.org/W2099261052","https://openalex.org/W4237510188"],"abstract_inverted_index":{"For":[0],"biologists":[1],"to":[2,16,48,57,104,159,168,182,193],"keep":[3],"up":[4],"with":[5,96,232],"developments":[6],"in":[7,100,174,177,216],"their":[8,32,140,195],"field":[9],"or":[10,29,40],"related":[11,151],"fields,":[12],"automation":[13],"is":[14],"desirable":[15],"more":[17],"efficiently":[18],"read":[19],"and":[20,31,55,79,137,213],"interpret":[21],"a":[22,64,101,114,175,227],"rapidly":[23],"growing":[24],"literature.":[25,45],"Identification":[26],"of":[27,38,66,70,156,162,180,197,201,210,230],"proteins":[28,69],"genes":[30],"interactions":[33],"can":[34],"facilitate":[35],"the":[36,44,80,92,147,186,202,208,217],"mapping":[37],"canonical":[39],"evolving":[41],"pathways":[42],"from":[43,77],"In":[46],"order":[47],"mine":[49],"such":[50],"data,":[51],"we":[52],"developed":[53],"procedures":[54],"tools":[56],"pre-qualify":[58],"documents":[59,67,190,205],"for":[60,68,127,220,237],"further":[61],"analysis.":[62],"Initially,":[63],"corpus":[65,178],"interest":[71],"was":[72,89,111,166],"built":[73],"using":[74,91,113,133,226,240],"alternate":[75],"symbols":[76],"Locuslink":[78],"Stanford":[81],"SOURCE":[82],"as":[83],"MEDLINE":[84],"search":[85],"terms.":[86],"The":[87,108,188],"query":[88,103],"refined":[90],"optimum":[93],"keywords":[94],"together":[95],"MeSH":[97],"terms":[98,233],"combined":[99],"Boolean":[102],"minimize":[105],"false":[106,211],"positives.":[107],"document":[109,143],"space":[110],"examined":[112,192],"strategy":[115],"employing;":[116],"latent":[117],"semantic":[118],"indexing":[119],"(LSI),":[120],"which":[121,172],"uses":[122],"Entrez's":[123],"\"related":[124],"papers\"":[125],"utility":[126],"MEDLINE.":[128],"Documents'":[129],"relationships":[130],"were":[131,191,224],"visualized":[132],"an":[134],"undirected":[135],"graph":[136],"scored":[138],"by":[139,146],"relatedness.":[141],"Distinct":[142],"clusters,":[144],"formed":[145],"most":[148,203],"highly":[149],"connected":[150],"papers,":[152],"are":[153],"mostly":[154],"composed":[155],"abstracts":[157],"relating":[158],"one":[160],"aspect":[161],"research.":[163],"This":[164],"feature":[165],"used":[167],"filter":[169],"irrelevant":[170,214],"abstracts,":[171],"resulted":[173],"reduction":[176],"size":[179],"10%":[181],"30%":[183],"depending":[184],"on":[185,235],"domain.":[187],"excluded":[189],"confirm":[194],"lack":[196],"relevance.":[198],"Corpora":[199],"consisted":[200],"relevant":[204],"thus":[206],"reducing":[207],"number":[209],"positives":[212],"examples":[215],"training":[218],"set":[219],"pathway":[221],"mapping.":[222],"Documents":[223],"tagged,":[225],"modified":[228],"version":[229],"GATE2,":[231],"based":[234],"GO":[236],"rule":[238],"induction":[239],"RAPIER.":[241]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
