{"id":"https://openalex.org/W4390412988","doi":"https://doi.org/10.3233/sw-233491","title":"Wikidata subsetting: Approaches, tools, and evaluation","display_name":"Wikidata subsetting: Approaches, tools, and evaluation","publication_year":2023,"publication_date":"2023-12-27","ids":{"openalex":"https://openalex.org/W4390412988","doi":"https://doi.org/10.3233/sw-233491"},"language":"en","primary_location":{"id":"doi:10.3233/sw-233491","is_oa":true,"landing_page_url":"https://doi.org/10.3233/sw-233491","pdf_url":"https://content.iospress.com:443/download/semantic-web/sw233491?id=semantic-web%2Fsw233491","source":{"id":"https://openalex.org/S4210177235","display_name":"Semantic Web","issn_l":"1570-0844","issn":["1570-0844","2210-4968"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Semantic Web","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://content.iospress.com:443/download/semantic-web/sw233491?id=semantic-web%2Fsw233491","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049179683","display_name":"Seyed Amir Hosseini Beghaeiraveri","orcid":"https://orcid.org/0000-0002-9123-5686"},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Seyed Amir Hosseini Beghaeiraveri","raw_affiliation_strings":["School of Mathematical and Computer Science, Heriot-Watt University, Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematical and Computer Science, Heriot-Watt University, Edinburgh, UK","institution_ids":["https://openalex.org/I32062511"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017967454","display_name":"Jos\u00e9 Emilio Labra Gayo","orcid":"https://orcid.org/0000-0001-8907-5348"},"institutions":[{"id":"https://openalex.org/I165339363","display_name":"Universidad de Oviedo","ror":"https://ror.org/006gksa02","country_code":"ES","type":"education","lineage":["https://openalex.org/I165339363"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jose Emilio Labra Gayo","raw_affiliation_strings":["University of Oviedo, Oviedo, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oviedo, Oviedo, Spain","institution_ids":["https://openalex.org/I165339363"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048705669","display_name":"Andra Waagmeester","orcid":"https://orcid.org/0000-0001-9773-4008"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Andra Waagmeester","raw_affiliation_strings":["Micelio, Belgium"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Micelio, Belgium","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066436740","display_name":"Ammar Ammar","orcid":"https://orcid.org/0000-0002-8399-8990"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Ammar Ammar","raw_affiliation_strings":["Dept of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, Netherlads"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, Netherlads","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010678163","display_name":"Carolina Gonz\u00e1lez","orcid":"https://orcid.org/0000-0003-3613-1769"},"institutions":[{"id":"https://openalex.org/I123431417","display_name":"Scripps Research Institute","ror":"https://ror.org/02dxx6824","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I123431417"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carolina Gonzalez","raw_affiliation_strings":["The Scripps Research Institute, US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Scripps Research Institute, US","institution_ids":["https://openalex.org/I123431417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071903823","display_name":"Denise Slenter","orcid":"https://orcid.org/0000-0001-8449-1318"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Denise Slenter","raw_affiliation_strings":["Dept of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, Netherlads"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, Netherlads","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003179870","display_name":"Sabah Ul-Hasan","orcid":"https://orcid.org/0000-0001-6334-452X"},"institutions":[{"id":"https://openalex.org/I123431417","display_name":"Scripps Research Institute","ror":"https://ror.org/02dxx6824","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I123431417"]},{"id":"https://openalex.org/I4210117952","display_name":"Hologic (United States)","ror":"https://ror.org/02td4ph55","country_code":"US","type":"company","lineage":["https://openalex.org/I4210117952"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sabah Ul-Hasan","raw_affiliation_strings":["Hologic Inc, US","The Scripps Research Institute, US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Hologic Inc, US","institution_ids":["https://openalex.org/I4210117952"]},{"raw_affiliation_string":"The Scripps Research Institute, US","institution_ids":["https://openalex.org/I123431417"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060194743","display_name":"Egon Willighagen","orcid":"https://orcid.org/0000-0001-7542-0286"},"institutions":[{"id":"https://openalex.org/I34352273","display_name":"Maastricht University","ror":"https://ror.org/02jz4aj89","country_code":"NL","type":"education","lineage":["https://openalex.org/I34352273"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Egon Willighagen","raw_affiliation_strings":["Dept of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, Netherlads"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Dept of Bioinformatics - BiGCaT, NUTRIM, Maastricht University, Netherlads","institution_ids":["https://openalex.org/I34352273"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015215097","display_name":"Fiona McNeill","orcid":"https://orcid.org/0000-0001-7873-5187"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fiona McNeill","raw_affiliation_strings":["School of Informatics, The University of Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Informatics, The University of Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016133956","display_name":"Alasdair J. G. Gray","orcid":"https://orcid.org/0000-0002-5711-4872"},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alasdair J.G. Gray","raw_affiliation_strings":["School of Mathematical and Computer Science, Heriot-Watt University, Edinburgh, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematical and Computer Science, Heriot-Watt University, Edinburgh, UK","institution_ids":["https://openalex.org/I32062511"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5048705669"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.994,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.8160086,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9851999878883362,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8390016555786133},{"id":"https://openalex.org/keywords/sparql","display_name":"SPARQL","score":0.7686029672622681},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.6401179432868958},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5647162795066833},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5543924570083618},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.536958634853363},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4981374740600586},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49740079045295715},{"id":"https://openalex.org/keywords/linked-data","display_name":"Linked data","score":0.41379937529563904},{"id":"https://openalex.org/keywords/rdf","display_name":"RDF","score":0.40579017996788025},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40158629417419434},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.20698592066764832},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16352280974388123},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.0990678071975708}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8390016555786133},{"id":"https://openalex.org/C41009113","wikidata":"https://www.wikidata.org/wiki/Q54871","display_name":"SPARQL","level":4,"score":0.7686029672622681},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.6401179432868958},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5647162795066833},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5543924570083618},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.536958634853363},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4981374740600586},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49740079045295715},{"id":"https://openalex.org/C69075417","wikidata":"https://www.wikidata.org/wiki/Q515701","display_name":"Linked data","level":3,"score":0.41379937529563904},{"id":"https://openalex.org/C147497476","wikidata":"https://www.wikidata.org/wiki/Q54872","display_name":"RDF","level":3,"score":0.40579017996788025},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40158629417419434},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.20698592066764832},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16352280974388123},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.0990678071975708},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/sw-233491","is_oa":true,"landing_page_url":"https://doi.org/10.3233/sw-233491","pdf_url":"https://content.iospress.com:443/download/semantic-web/sw233491?id=semantic-web%2Fsw233491","source":{"id":"https://openalex.org/S4210177235","display_name":"Semantic Web","issn_l":"1570-0844","issn":["1570-0844","2210-4968"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Semantic Web","raw_type":"journal-article"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/04341564-4ec4-4f97-b471-eacb7d627281","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/04341564-4ec4-4f97-b471-eacb7d627281","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Hosseini beghaeiraveri, S A, Labra gayo, J E, Waagmeester, A, Ammar, A, Gonzalez, C, Slenter, D, Ul-Hasan, S, Willighagen, E, Mcneill, F, Gray, A J G, Kaffee, L-A (ed.), Razniewski, S (ed.) & Vougiouklis, P (ed.) 2024, 'Wikidata subsetting : Approaches, tools, and evaluation', Semantic web, vol. 15, no. 6, pp. 2209-2235. https://doi.org/10.3233/SW-233491","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.3233/sw-233491","is_oa":true,"landing_page_url":"https://doi.org/10.3233/sw-233491","pdf_url":"https://content.iospress.com:443/download/semantic-web/sw233491?id=semantic-web%2Fsw233491","source":{"id":"https://openalex.org/S4210177235","display_name":"Semantic Web","issn_l":"1570-0844","issn":["1570-0844","2210-4968"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Semantic Web","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8531315819","display_name":null,"funder_award_id":"G-2021-17106","funder_id":"https://openalex.org/F4320306151","funder_display_name":"Alfred P. Sloan Foundation"}],"funders":[{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390412988.pdf"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1893177189","https://openalex.org/W2013667086","https://openalex.org/W2080133951","https://openalex.org/W2302501749","https://openalex.org/W2750941408","https://openalex.org/W2952216057","https://openalex.org/W3011511325","https://openalex.org/W3095701222","https://openalex.org/W3100284210","https://openalex.org/W3109507892","https://openalex.org/W3155012498","https://openalex.org/W3203673500","https://openalex.org/W4205317928","https://openalex.org/W4285209092","https://openalex.org/W4309610680","https://openalex.org/W4393506407","https://openalex.org/W4393738610"],"related_works":["https://openalex.org/W199330785","https://openalex.org/W2615202182","https://openalex.org/W98016204","https://openalex.org/W2904139343","https://openalex.org/W2767591199","https://openalex.org/W2101525042","https://openalex.org/W2563388676","https://openalex.org/W4388184885","https://openalex.org/W4322622679","https://openalex.org/W2476635466"],"abstract_inverted_index":{"Wikidata":[0,39,60,84,156,249],"is":[1,40,95,204,211],"a":[2,20,62,78,87,185],"massive":[3],"Knowledge":[4],"Graph":[5],"(KG),":[6],"including":[7],"more":[8],"than":[9],"100":[10,55],"million":[11],"data":[12,82,104],"items":[13,193],"and":[14,31,100,120,147,149,162,172,194,223,236],"nearly":[15],"1.5":[16],"billion":[17],"statements,":[18],"covering":[19],"wide":[21],"range":[22,80,105],"of":[23,38,53,57,81,98,167,187,238],"topics":[24],"such":[25],"as":[26],"geography,":[27],"history,":[28],"scholarly":[29],"articles,":[30],"life":[32],"science":[33],"data.":[34,58],"The":[35,199],"large":[36],"volume":[37],"difficult":[41],"to":[42],"handle":[43],"for":[44,71,90,126,155],"research":[45],"purposes;":[46],"many":[47],"researchers":[48,75],"cannot":[49],"afford":[50],"the":[51,96,102,107,139,176,207,214,224,234,247],"costs":[52],"hosting":[54],"GB":[56],"While":[59],"provides":[61],"public":[63,248],"SPARQL":[64,250],"endpoint,":[65],"it":[66],"can":[67],"only":[68,76],"be":[69,244],"used":[70],"short-running":[72],"queries.":[73],"Often,":[74],"require":[77],"limited":[79],"from":[83,106],"focusing":[85],"on":[86],"particular":[88],"topic":[89],"their":[91,144],"use":[92,218],"case.":[93],"Subsetting":[94],"process":[97,110],"defining":[99,175],"extracting":[101,191,197],"required":[103],"KG;":[108],"this":[109,135],"has":[111],"received":[112],"increasing":[113],"attention":[114],"in":[115,165,174,190,196,202],"recent":[116],"years.":[117],"Specific":[118],"tools":[119,153,183],"several":[121],"approaches":[122],"have":[123,129,184,220,227],"been":[124,131,221,228],"developed":[125],"subsetting,":[127],"which":[128,240],"not":[130,243],"evaluated":[132],"yet.":[133],"In":[134],"paper,":[136],"we":[137],"survey":[138],"available":[140],"subsetting":[141,157],"approaches,":[142],"introducing":[143],"general":[145],"strengths":[146],"weaknesses,":[148],"evaluate":[150],"four":[151,182],"practical":[152],"specific":[154],"\u2013":[158,164],"WDSub,":[159],"KGTK,":[160],"WDumper,":[161],"WDF":[163],"terms":[166],"execution":[168],"performance,":[169],"extraction":[170,203],"accuracy,":[171],"flexibility":[173],"subsets.":[177],"Results":[178],"show":[179],"that":[180],"all":[181],"minimum":[186],"99.96%":[188],"accuracy":[189],"defined":[192,222],"99.25%":[195],"statements.":[198],"fastest":[200],"tool":[201,210],"WDF,":[205],"while":[206],"most":[208],"flexible":[209],"WDSub.":[212],"During":[213],"experiments,":[215],"multiple":[216],"subset":[217],"cases":[219],"extracted":[225],"subsets":[226],"analyzed,":[229],"obtaining":[230],"valuable":[231],"information":[232],"about":[233],"variety":[235],"quality":[237],"Wikidata,":[239],"would":[241],"otherwise":[242],"possible":[245],"through":[246],"endpoint.":[251]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
