{"id":"https://openalex.org/W3004838227","doi":"https://doi.org/10.1109/access.2020.2972205","title":"CREGEX: A Biomedical Text Classifier Based on Automatically Generated Regular Expressions","display_name":"CREGEX: A Biomedical Text Classifier Based on Automatically Generated Regular Expressions","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3004838227","doi":"https://doi.org/10.1109/access.2020.2972205","mag":"3004838227"},"language":"en","primary_location":{"id":"doi:10.1109/access.2020.2972205","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.2972205","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08985344.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08985344.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084370844","display_name":"Christopher A. Flores","orcid":"https://orcid.org/0000-0003-0994-5919"},"institutions":[{"id":"https://openalex.org/I172787465","display_name":"University of Concepci\u00f3n","ror":"https://ror.org/0460jpj73","country_code":"CL","type":"education","lineage":["https://openalex.org/I172787465"]}],"countries":["CL"],"is_corresponding":true,"raw_author_name":"Christopher A. Flores","raw_affiliation_strings":["Electrical Engineering Department, Universidad de Concepci\u00f3n, Concepci\u00f3n, Chile"],"affiliations":[{"raw_affiliation_string":"Electrical Engineering Department, Universidad de Concepci\u00f3n, Concepci\u00f3n, Chile","institution_ids":["https://openalex.org/I172787465"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091073936","display_name":"Rosa L. Figueroa","orcid":"https://orcid.org/0000-0002-2021-0203"},"institutions":[{"id":"https://openalex.org/I172787465","display_name":"University of Concepci\u00f3n","ror":"https://ror.org/0460jpj73","country_code":"CL","type":"education","lineage":["https://openalex.org/I172787465"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Rosa L. Figueroa","raw_affiliation_strings":["Electrical Engineering Department, Universidad de Concepci\u00f3n, Concepci\u00f3n, Chile"],"affiliations":[{"raw_affiliation_string":"Electrical Engineering Department, Universidad de Concepci\u00f3n, Concepci\u00f3n, Chile","institution_ids":["https://openalex.org/I172787465"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034094232","display_name":"Jorge E. Pezoa","orcid":"https://orcid.org/0000-0002-5225-5334"},"institutions":[{"id":"https://openalex.org/I172787465","display_name":"University of Concepci\u00f3n","ror":"https://ror.org/0460jpj73","country_code":"CL","type":"education","lineage":["https://openalex.org/I172787465"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Jorge E. Pezoa","raw_affiliation_strings":["Electrical Engineering Department, Universidad de Concepci\u00f3n, Concepci\u00f3n, Chile"],"affiliations":[{"raw_affiliation_string":"Electrical Engineering Department, Universidad de Concepci\u00f3n, Concepci\u00f3n, Chile","institution_ids":["https://openalex.org/I172787465"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058747294","display_name":"Qing Zeng\u2010Treitler","orcid":"https://orcid.org/0000-0002-8353-7473"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Zeng-Treitler","raw_affiliation_strings":["Biomedical Informatics Center, George Washington University, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Biomedical Informatics Center, George Washington University, Washington, USA","institution_ids":["https://openalex.org/I193531525"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5084370844"],"corresponding_institution_ids":["https://openalex.org/I172787465"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.7522,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.68741923,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"8","issue":null,"first_page":"29270","last_page":"29280"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7197704315185547},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5858899354934692},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5661239624023438},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.5216565132141113},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4428718090057373},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1228158175945282}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7197704315185547},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5858899354934692},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5661239624023438},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.5216565132141113},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4428718090057373},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1228158175945282}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2020.2972205","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.2972205","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08985344.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ec7b9e1fca964e9eba4b39c1c3781981","is_oa":true,"landing_page_url":"https://doaj.org/article/ec7b9e1fca964e9eba4b39c1c3781981","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 8, Pp 29270-29280 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2020.2972205","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2020.2972205","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8948470/08985344.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.46000000834465027}],"awards":[{"id":"https://openalex.org/G18725684","display_name":null,"funder_award_id":"16CTTS6639","funder_id":"https://openalex.org/F4320311278","funder_display_name":"Corporaci\u00f3n de Fomento de la Producci\u00f3n"},{"id":"https://openalex.org/G415940621","display_name":null,"funder_award_id":"UCO 1866","funder_id":"https://openalex.org/F4320323717","funder_display_name":"Universidad de Concepci\u00f3n"},{"id":"https://openalex.org/G5594623709","display_name":null,"funder_award_id":"2017-21172062 Scholarship","funder_id":"https://openalex.org/F4320334812","funder_display_name":"Comisi\u00f3n Nacional de Investigaci\u00f3n Cient\u00edfica y Tecnol\u00f3gica"},{"id":"https://openalex.org/G5687599572","display_name":null,"funder_award_id":"ID19I10120","funder_id":"https://openalex.org/F4320334812","funder_display_name":"Comisi\u00f3n Nacional de Investigaci\u00f3n Cient\u00edfica y Tecnol\u00f3gica"},{"id":"https://openalex.org/G6010341734","display_name":null,"funder_award_id":"16CTTS66390","funder_id":"https://openalex.org/F4320311278","funder_display_name":"Corporaci\u00f3n de Fomento de la Producci\u00f3n"},{"id":"https://openalex.org/G6362086579","display_name":null,"funder_award_id":"ID19I10120","funder_id":"https://openalex.org/F4320338106","funder_display_name":"Fondo de Fomento al Desarrollo Cient\u00edfico y Tecnol\u00f3gico"},{"id":"https://openalex.org/G8578748274","display_name":null,"funder_award_id":"217.092.052-1.0","funder_id":"https://openalex.org/F4320323717","funder_display_name":"Universidad de Concepci\u00f3n"}],"funders":[{"id":"https://openalex.org/F4320311278","display_name":"Corporaci\u00f3n de Fomento de la Producci\u00f3n","ror":"https://ror.org/00as45r68"},{"id":"https://openalex.org/F4320323717","display_name":"Universidad de Concepci\u00f3n","ror":"https://ror.org/0460jpj73"},{"id":"https://openalex.org/F4320334812","display_name":"Comisi\u00f3n Nacional de Investigaci\u00f3n Cient\u00edfica y Tecnol\u00f3gica","ror":"https://ror.org/02ap3w078"},{"id":"https://openalex.org/F4320338106","display_name":"Fondo de Fomento al Desarrollo Cient\u00edfico y Tecnol\u00f3gico","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3004838227.pdf","grobid_xml":"https://content.openalex.org/works/W3004838227.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W72959484","https://openalex.org/W95287305","https://openalex.org/W96276655","https://openalex.org/W127410472","https://openalex.org/W1570448133","https://openalex.org/W1616853180","https://openalex.org/W1775135849","https://openalex.org/W2002661695","https://openalex.org/W2022645200","https://openalex.org/W2026213144","https://openalex.org/W2038941723","https://openalex.org/W2059383863","https://openalex.org/W2068541969","https://openalex.org/W2077123864","https://openalex.org/W2077482713","https://openalex.org/W2093780939","https://openalex.org/W2094390624","https://openalex.org/W2106950427","https://openalex.org/W2113730959","https://openalex.org/W2120538351","https://openalex.org/W2125800352","https://openalex.org/W2258733579","https://openalex.org/W2260037449","https://openalex.org/W2322191948","https://openalex.org/W2469025734","https://openalex.org/W2510356291","https://openalex.org/W2524624025","https://openalex.org/W2543215135","https://openalex.org/W2583704237","https://openalex.org/W2754384735","https://openalex.org/W2783380896","https://openalex.org/W2790730965","https://openalex.org/W2800318991","https://openalex.org/W2887312980","https://openalex.org/W2897065501","https://openalex.org/W2913991821","https://openalex.org/W2922478741","https://openalex.org/W2938304012","https://openalex.org/W2951477402","https://openalex.org/W2959598844","https://openalex.org/W2964992537","https://openalex.org/W2979602634","https://openalex.org/W3100635282","https://openalex.org/W3103864159","https://openalex.org/W6603854148","https://openalex.org/W6605220023","https://openalex.org/W6727590805","https://openalex.org/W6732540511","https://openalex.org/W6753743562"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2563334590","https://openalex.org/W2390279801","https://openalex.org/W2807616588","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W3204019825"],"abstract_inverted_index":{"High":[0],"accuracy":[1,213],"text":[2,52,94,246],"classifiers":[3,24,178,209],"are":[4,85],"used":[5,159,197,220],"nowadays":[6],"in":[7,105,147,193,210],"organizing":[8],"large":[9,121],"amounts":[10],"of":[11,103,107,115,123,165,185,212,224],"biomedical":[12,51],"information":[13,150],"and":[14,71,79,109,112,134,155,173,207,214],"supporting":[15],"clinical":[16],"decision-making":[17],"processes.":[18],"In":[19],"medical":[20],"informatics,":[21],"regular":[22,240],"expression-based":[23],"have":[25],"emerged":[26],"as":[27,188],"an":[28,56,64,69],"alternative":[29],"to":[30,36,39,140,161,227,238,243],"traditional,":[31],"discriminative":[32,72],"classification":[33],"algorithms":[34],"due":[35],"their":[37],"ability":[38,242],"model":[40],"sequential":[41],"patterns.":[42,247],"This":[43],"article":[44],"presents":[45],"CREGEX":[46,126,200],"(Classifier":[47],"Regular":[48,83],"Expression),":[49],"a":[50,92,116,120,137,221,233],"classifier":[53],"based":[54],"on":[55,151],"automatically":[57,67,86],"generated":[58,87],"regular-expressions-based":[59,73],"feature":[60,74,117,129],"space.":[61],"We":[62],"conceived":[63],"algorithm":[65],"for":[66,77,198],"constructing":[68],"informative":[70],"space,":[75],"suitable":[76],"binary":[78],"multiclass":[80],"discrimination":[81],"problems.":[82],"expressions":[84],"from":[88],"training":[89,225],"texts":[90],"using":[91],"coarse-to-fine":[93],"aligning":[95],"method,":[96],"which":[97],"trades":[98],"off":[99],"the":[100,113,163,195,205,229,239],"lexical":[101],"variants":[102],"words,":[104],"terms":[106,211],"gender":[108],"grammatical":[110],"number,":[111],"generation":[114],"space":[118],"containing":[119],"number":[122],"noisy":[124],"features.":[125,189],"carries":[127],"out":[128],"selection":[130],"by":[131],"filtering":[132],"keywords":[133],"also":[135,180,219],"computes":[136],"confidence":[138],"metric":[139],"classify":[141],"test":[142],"texts.":[143],"Three":[144],"de-identified":[145],"datasets":[146,196],"Spanish,":[148],"with":[149,182],"smoking":[152],"habits,":[153],"obesity,":[154],"obesity":[156],"types,":[157],"were":[158,179],"here":[160],"assess":[162],"performance":[164,235],"CREGEX.":[166],"For":[167],"comparison,":[168],"Support":[169],"Vector":[170],"Machine":[171],"(SVM)":[172],"Na\u00efve":[174],"Bayes":[175],"(NB)":[176],"supervised":[177],"trained":[181],"consecutive":[183],"sequences":[184],"tokens":[186],"(n-grams)":[187],"Results":[190],"show":[191],"that,":[192],"all":[194],"evaluation,":[199],"not":[201],"only":[202],"outperformed":[203],"both":[204],"SVM":[206],"NB":[208],"F-measure":[215],"(p-value<;":[216],"0.05)":[217],"but":[218],"fewer":[222],"amount":[223],"examples":[226],"achieve":[228],"same":[230],"performance.":[231],"Such":[232],"superior":[234],"is":[236],"attributed":[237],"expressions'":[241],"represent":[244],"complex":[245]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
