{"id":"https://openalex.org/W3196395648","doi":"https://doi.org/10.1186/s13040-021-00271-w","title":"Taxonomy-based data representation for data mining: an example of the magnitude of risk associated with H. pylori infection","display_name":"Taxonomy-based data representation for data mining: an example of the magnitude of risk associated with H. pylori infection","publication_year":2021,"publication_date":"2021-08-28","ids":{"openalex":"https://openalex.org/W3196395648","doi":"https://doi.org/10.1186/s13040-021-00271-w","mag":"3196395648","pmid":"https://pubmed.ncbi.nlm.nih.gov/34454568"},"language":"en","primary_location":{"id":"doi:10.1186/s13040-021-00271-w","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-021-00271-w","pdf_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-021-00271-w","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-021-00271-w","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091399998","display_name":"Inese Po\u013caka","orcid":"https://orcid.org/0000-0002-9892-7765"},"institutions":[{"id":"https://openalex.org/I91123046","display_name":"University of Latvia","ror":"https://ror.org/05g3mes96","country_code":"LV","type":"education","lineage":["https://openalex.org/I91123046"]}],"countries":["LV"],"is_corresponding":true,"raw_author_name":"Inese Polaka","raw_affiliation_strings":["University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia. inese.polaka@lu.lv","University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia"],"raw_orcid":"https://orcid.org/0000-0002-9892-7765","affiliations":[{"raw_affiliation_string":"University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia. inese.polaka@lu.lv","institution_ids":["https://openalex.org/I91123046"]},{"raw_affiliation_string":"University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia","institution_ids":["https://openalex.org/I91123046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023204324","display_name":"Danute Ra\u017euka\u2010Ebela","orcid":"https://orcid.org/0000-0002-3069-0140"},"institutions":[{"id":"https://openalex.org/I91123046","display_name":"University of Latvia","ror":"https://ror.org/05g3mes96","country_code":"LV","type":"education","lineage":["https://openalex.org/I91123046"]}],"countries":["LV"],"is_corresponding":false,"raw_author_name":"Danute Razuka-Ebela","raw_affiliation_strings":["University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia","institution_ids":["https://openalex.org/I91123046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100738762","display_name":"Jin Young Park","orcid":"https://orcid.org/0000-0003-2491-5099"},"institutions":[{"id":"https://openalex.org/I42237331","display_name":"Centre international de recherche sur le cancer","ror":"https://ror.org/00v452281","country_code":"FR","type":"government","lineage":["https://openalex.org/I4210105654","https://openalex.org/I42237331"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Jin Young Park","raw_affiliation_strings":["International Agency for Research on Cancer, 150 Cours Albert Thomas, 69372, Lyon, CEDEX 08, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"International Agency for Research on Cancer, 150 Cours Albert Thomas, 69372, Lyon, CEDEX 08, France","institution_ids":["https://openalex.org/I42237331"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022224649","display_name":"M\u0101rcis Leja","orcid":"https://orcid.org/0000-0002-0319-8855"},"institutions":[{"id":"https://openalex.org/I91123046","display_name":"University of Latvia","ror":"https://ror.org/05g3mes96","country_code":"LV","type":"education","lineage":["https://openalex.org/I91123046"]}],"countries":["LV"],"is_corresponding":false,"raw_author_name":"Marcis Leja","raw_affiliation_strings":["Center for Gastric Diseases GASTRO, Gailezera Street 1, Riga, LV-1079, Latvia","University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center for Gastric Diseases GASTRO, Gailezera Street 1, Riga, LV-1079, Latvia","institution_ids":[]},{"raw_affiliation_string":"University of Latvia, Institute of Clinical and Preventive Medicine, Gailezera Street 1, Riga, LV-1079, Latvia","institution_ids":["https://openalex.org/I91123046"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091399998"],"corresponding_institution_ids":["https://openalex.org/I91123046"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17569467,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":"1","first_page":"43","last_page":"43"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10276","display_name":"Helicobacter pylori-related gastroenterology studies","score":0.08540000021457672,"subfield":{"id":"https://openalex.org/subfields/2746","display_name":"Surgery"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10276","display_name":"Helicobacter pylori-related gastroenterology studies","score":0.08540000021457672,"subfield":{"id":"https://openalex.org/subfields/2746","display_name":"Surgery"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12647","display_name":"Traditional Chinese Medicine Studies","score":0.04399999976158142,"subfield":{"id":"https://openalex.org/subfields/2707","display_name":"Complementary and alternative medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T14374","display_name":"Statistical Methods in Epidemiology","score":0.03999999910593033,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.7442445158958435},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7140461802482605},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.6930527687072754},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.59070885181427},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5516128540039062},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.5250544548034668},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.47232264280319214},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4082143306732178},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4027116298675537},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3536433279514313},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3283616006374359},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.0967215895652771}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.7442445158958435},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7140461802482605},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.6930527687072754},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.59070885181427},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5516128540039062},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.5250544548034668},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.47232264280319214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4082143306732178},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4027116298675537},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3536433279514313},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3283616006374359},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0967215895652771},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13040-021-00271-w","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-021-00271-w","pdf_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-021-00271-w","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},{"id":"pmid:34454568","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34454568","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData mining","raw_type":null},{"id":"pmh:oai:doaj.org/article:0008d08da5ca4cc79c6b0842da405335","is_oa":true,"landing_page_url":"https://doaj.org/article/0008d08da5ca4cc79c6b0842da405335","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Mining, Vol 14, Iss 1, Pp 1-21 (2021)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8400764","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8400764","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Min","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s13040-021-00271-w","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-021-00271-w","pdf_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-021-00271-w","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5958172747","display_name":null,"funder_award_id":"LZP-2018/1-0135","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6817681629","display_name":null,"funder_award_id":"1.1.1.2/VIAA/2/18/270","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3196395648.pdf","grobid_xml":"https://content.openalex.org/works/W3196395648.grobid-xml"},"referenced_works_count":40,"referenced_works":["https://openalex.org/W1555273987","https://openalex.org/W1571733964","https://openalex.org/W1594031697","https://openalex.org/W1623603061","https://openalex.org/W1670263352","https://openalex.org/W1974793840","https://openalex.org/W1978364195","https://openalex.org/W2011427563","https://openalex.org/W2011487497","https://openalex.org/W2016381774","https://openalex.org/W2017115884","https://openalex.org/W2019348169","https://openalex.org/W2019731274","https://openalex.org/W2030314525","https://openalex.org/W2037768235","https://openalex.org/W2056884786","https://openalex.org/W2056959138","https://openalex.org/W2113726636","https://openalex.org/W2125055259","https://openalex.org/W2135505005","https://openalex.org/W2140118293","https://openalex.org/W2157768799","https://openalex.org/W2164400088","https://openalex.org/W2166489554","https://openalex.org/W2191661049","https://openalex.org/W2513400162","https://openalex.org/W2515576947","https://openalex.org/W2599264051","https://openalex.org/W2609492761","https://openalex.org/W2743548181","https://openalex.org/W2788150524","https://openalex.org/W2808291487","https://openalex.org/W2911964244","https://openalex.org/W2934981695","https://openalex.org/W2979935139","https://openalex.org/W2999208001","https://openalex.org/W3085162807","https://openalex.org/W3085209558","https://openalex.org/W4297875023","https://openalex.org/W6843735874"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W2905433371","https://openalex.org/W4297676672","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4281702477","https://openalex.org/W2888392564","https://openalex.org/W4310278675","https://openalex.org/W4388422664"],"abstract_inverted_index":{"BACKGROUND:":[0],"The":[1,26,163,207],"amount":[2,316],"of":[3,20,30,113,134,140,147,255,260,284,303,310,317],"available":[4],"and":[5,18,23,119,123,175,186,220,241,273,288,306],"potentially":[6],"significant":[7,169,230],"data":[8,24,32,59,81,85,98,105,248],"describing":[9],"study":[10,97,121,261,311],"subjects":[11,262],"is":[12,50,229],"ever":[13],"growing":[14],"with":[15,173],"the":[16,95,111,132,138,222,226,232,253,258,282,292,297,301,304,315,323],"introduction":[17],"integration":[19],"different":[21,88],"registries":[22],"banks.":[25],"single":[27],"specific":[28,42,205,233,269,290,318],"attribute":[29,194],"these":[31],"are":[33,238],"not":[34],"always":[35,239],"necessary;":[36],"more":[37,187],"often,":[38],"membership":[39,196,267],"to":[40,52,73,78,106,137,177,197,268,291,321],"a":[41,54,198,201,204],"group":[43],"(e.g.":[44],"diet,":[45],"social":[46],"'bubble',":[47],"living":[48],"area)":[49],"enough":[51],"build":[53],"successful":[55],"machine":[56],"learning":[57],"or":[58,160],"mining":[60],"model":[61],"without":[62,295],"overfitting":[63,285],"it.":[64],"Therefore,":[65],"in":[66,110,171,200,225,247,263],"this":[67,108,210],"article":[68],"we":[69],"propose":[70],"an":[71,277],"approach":[72,109,149],"building":[74],"taxonomies":[75,256],"using":[76,142,153,266],"clustering":[77],"replace":[79],"detailed":[80],"from":[82,87],"large":[83],"heterogenous":[84],"sets":[86],"sources,":[89],"while":[90],"improving":[91],"interpretability.":[92],"We":[93,129],"used":[94,246,320],"GISTAR":[96],"base":[99],"that":[100,157,243],"holds":[101],"exhaustive":[102],"self-assessment":[103],"questionnaire":[104],"demonstrate":[107],"task":[112],"differentiating":[114],"between":[115],"H.":[116],"pylori":[117],"positive":[118],"negative":[120],"participants,":[122],"assessing":[124],"their":[125,274],"potential":[126],"risk":[127,283],"factors.":[128],"have":[130],"compared":[131],"results":[133,139,166],"taxonomy-based":[135,164],"classification":[136,141,155,165,180],"raw":[143],"data.":[144],"RESULTS:":[145],"Evaluation":[146],"our":[148],"was":[150],"carried":[151],"out":[152],"6":[154],"algorithms":[156],"induce":[158],"rule-based":[159],"tree-based":[161],"classifiers.":[162],"show":[167],"no":[168],"loss":[170],"information,":[172],"similar":[174],"up":[176],"2.5%":[178],"better":[179],"accuracy.":[181],"Information":[182],"held":[183],"by":[184,192,216,313],"10":[185],"attributes":[188,287],"can":[189,212,280],"be":[190,213,245],"replaced":[191],"one":[193],"demonstrating":[195],"cluster":[199],"hierarchy":[202],"at":[203],"cut.":[206],"clusters":[208],"created":[209],"way":[211],"easily":[214],"interpreted":[215],"researchers":[217],"(doctors,":[218],"epidemiologists)":[219],"describe":[221],"co-occurring":[223],"features":[224,240],"group,":[227],"which":[228],"for":[231,257],"task.":[234],"CONCLUSIONS:":[235],"While":[236],"there":[237],"measurements":[242],"must":[244],"analysis":[249],"as":[250],"they":[251],"are,":[252],"use":[254],"description":[259],"parallel":[264],"allows":[265],"naturally":[270],"occurring":[271],"groups":[272],"impact":[275],"on":[276],"outcome.":[278],"This":[279],"decrease":[281],"(picking":[286],"values":[289],"training":[293],"set":[294],"explaining":[296],"underlying":[298],"conditions),":[299],"improve":[300,307],"accuracy":[302],"models,":[305],"privacy":[308],"protection":[309],"participants":[312],"decreasing":[314],"information":[319],"identify":[322],"individual.":[324]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
