{"id":"https://openalex.org/W4416785089","doi":"https://doi.org/10.1186/s40537-025-01312-5","title":"Enhancing data discovery with contextual pre-filtering","display_name":"Enhancing data discovery with contextual pre-filtering","publication_year":2025,"publication_date":"2025-11-28","ids":{"openalex":"https://openalex.org/W4416785089","doi":"https://doi.org/10.1186/s40537-025-01312-5"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-025-01312-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01312-5","pdf_url":"https://link.springer.com/content/pdf/10.1186/s40537-025-01312-5.pdf","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1186/s40537-025-01312-5.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102925278","display_name":"Javier Flores","orcid":"https://orcid.org/0000-0002-2998-9962"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Javier Flores","raw_affiliation_strings":["Database Technologies and Information Management Group, Universitat Polit\u00e8cnica de Catalunya, Jordi Girona 31, 08034, Barcelona, Catalunya, Spain"],"affiliations":[{"raw_affiliation_string":"Database Technologies and Information Management Group, Universitat Polit\u00e8cnica de Catalunya, Jordi Girona 31, 08034, Barcelona, Catalunya, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013379001","display_name":"Sergi Nadal","orcid":"https://orcid.org/0000-0002-8565-952X"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Sergi Nadal","raw_affiliation_strings":["Database Technologies and Information Management Group, Universitat Polit\u00e8cnica de Catalunya, Jordi Girona 31, 08034, Barcelona, Catalunya, Spain"],"affiliations":[{"raw_affiliation_string":"Database Technologies and Information Management Group, Universitat Polit\u00e8cnica de Catalunya, Jordi Girona 31, 08034, Barcelona, Catalunya, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067693515","display_name":"Oscar Romero","orcid":"https://orcid.org/0000-0001-6350-8328"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Oscar Romero","raw_affiliation_strings":["Database Technologies and Information Management Group, Universitat Polit\u00e8cnica de Catalunya, Jordi Girona 31, 08034, Barcelona, Catalunya, Spain"],"affiliations":[{"raw_affiliation_string":"Database Technologies and Information Management Group, Universitat Polit\u00e8cnica de Catalunya, Jordi Girona 31, 08034, Barcelona, Catalunya, Spain","institution_ids":["https://openalex.org/I9617848"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102925278"],"corresponding_institution_ids":["https://openalex.org/I9617848"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.4164566,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"12","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.660099983215332,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.660099983215332,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.25040000677108765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.023600000888109207,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.8069999814033508},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.593999981880188},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5194000005722046},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.48579999804496765},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4659999907016754},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.44350001215934753},{"id":"https://openalex.org/keywords/computational-science-and-engineering","display_name":"Computational Science and Engineering","score":0.4271000027656555},{"id":"https://openalex.org/keywords/data-discovery","display_name":"Data discovery","score":0.4124999940395355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8682000041007996},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.8069999814033508},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.593999981880188},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5382000207901001},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5194000005722046},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.48579999804496765},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.44350001215934753},{"id":"https://openalex.org/C68597687","wikidata":"https://www.wikidata.org/wiki/Q362601","display_name":"Computational Science and Engineering","level":2,"score":0.4271000027656555},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4239000082015991},{"id":"https://openalex.org/C2777516300","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data discovery","level":3,"score":0.4124999940395355},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.38600000739097595},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.382999986410141},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3508000075817108},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3504999876022339},{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.3125},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3061999976634979},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C112789634","wikidata":"https://www.wikidata.org/wiki/Q18207010","display_name":"False positives and false negatives","level":3,"score":0.28600001335144043},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C105445830","wikidata":"https://www.wikidata.org/wiki/Q6322855","display_name":"K-optimal pattern discovery","level":3,"score":0.2667999863624573},{"id":"https://openalex.org/C40077939","wikidata":"https://www.wikidata.org/wiki/Q2336004","display_name":"Semi-structured data","level":3,"score":0.2621999979019165},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2531000077724457}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1186/s40537-025-01312-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01312-5","pdf_url":"https://link.springer.com/content/pdf/10.1186/s40537-025-01312-5.pdf","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:upcommons.upc.edu:2117/449055","is_oa":true,"landing_page_url":"https://hdl.handle.net/2117/449055","pdf_url":"https://upcommons.upc.edu/bitstreams/c4d8a0f6-d77e-4b15-b376-372e6e2709c8/download","source":{"id":"https://openalex.org/S4210207057","display_name":"QRU Quaderns de Recerca en Urbanisme","issn_l":"2014-9689","issn":["2014-9689","2385-6777"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310322448","host_organization_name":"Q71272178","host_organization_lineage":["https://openalex.org/P4310322448"],"host_organization_lineage_names":["Q71272178"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:f7bfe1d8158d4c68b69dd00ac0d4f3ce","is_oa":true,"landing_page_url":"https://doaj.org/article/f7bfe1d8158d4c68b69dd00ac0d4f3ce","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 12, Iss 1, Pp 1-36 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-025-01312-5","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01312-5","pdf_url":"https://link.springer.com/content/pdf/10.1186/s40537-025-01312-5.pdf","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2262748287","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3429648993","display_name":null,"funder_award_id":"PID202","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G4126322094","display_name":null,"funder_award_id":"01100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G5421974903","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320322930","funder_display_name":"Ministerio de Ciencia e Innovaci\u00f3n"},{"id":"https://openalex.org/G6292477444","display_name":null,"funder_award_id":"PID2020-117191RB-I00 / AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320322930","funder_display_name":"Ministerio de Ciencia e Innovaci\u00f3n"},{"id":"https://openalex.org/G661330594","display_name":null,"funder_award_id":"00110","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6685425346","display_name":null,"funder_award_id":"0011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7266728691","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8055432103","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320322930","funder_display_name":"Ministerio de Ciencia e Innovaci\u00f3n"},{"id":"https://openalex.org/G8260616629","display_name":null,"funder_award_id":"011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"}],"funders":[{"id":"https://openalex.org/F4320322930","display_name":"Ministerio de Ciencia e Innovaci\u00f3n","ror":"https://ror.org/034900433"},{"id":"https://openalex.org/F4320334322","display_name":"HORIZON EUROPE Framework Programme","ror":null},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416785089.pdf","grobid_xml":"https://content.openalex.org/works/W4416785089.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1969621019","https://openalex.org/W1987562803","https://openalex.org/W1996505782","https://openalex.org/W2008896880","https://openalex.org/W2065259291","https://openalex.org/W2092364718","https://openalex.org/W2166400748","https://openalex.org/W2241750177","https://openalex.org/W2286724461","https://openalex.org/W2396588571","https://openalex.org/W2398606196","https://openalex.org/W2788608347","https://openalex.org/W2798664493","https://openalex.org/W2898796029","https://openalex.org/W2907492528","https://openalex.org/W2935512716","https://openalex.org/W2942727666","https://openalex.org/W2948163032","https://openalex.org/W2951621897","https://openalex.org/W2963174348","https://openalex.org/W2963626623","https://openalex.org/W2970059736","https://openalex.org/W2970992672","https://openalex.org/W2971535351","https://openalex.org/W2971681342","https://openalex.org/W3014616325","https://openalex.org/W3037852608","https://openalex.org/W3080456792","https://openalex.org/W3082424964","https://openalex.org/W3092962901","https://openalex.org/W3119752913","https://openalex.org/W3135488505","https://openalex.org/W3174181645","https://openalex.org/W3174637548","https://openalex.org/W3196904276","https://openalex.org/W4281811562","https://openalex.org/W4283383705","https://openalex.org/W4289533982","https://openalex.org/W4309563570","https://openalex.org/W4311927641","https://openalex.org/W4367032190","https://openalex.org/W4375928372","https://openalex.org/W4380433117","https://openalex.org/W4385653220","https://openalex.org/W4386026606","https://openalex.org/W4386457044","https://openalex.org/W4387332921","https://openalex.org/W4388626299","https://openalex.org/W4392455862","https://openalex.org/W4393066124","https://openalex.org/W4400909768"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"In":[1],"this":[2],"paper,":[3],"entity":[4],"contextual":[5],"pre-filtering":[6,35],"is":[7,37],"proposed":[8,34,60],"to":[9,24,51,87],"refine":[10],"dataset":[11],"relevance":[12],"assessment":[13],"and":[14,39,70,92],"streamline":[15],"data":[16,56,63,73,89],"discovery.":[17],"Heterogeneous":[18],"Graph":[19],"Neural":[20],"Networks":[21],"are":[22],"used":[23],"exploit":[25],"the":[26],"local":[27],"context":[28],"embedded":[29],"within":[30],"graph-based":[31],"schemas.":[32],"The":[33,59],"approach":[36],"versatile":[38],"does":[40],"not":[41],"rely":[42],"on":[43],"any":[44],"specific":[45],"similarity":[46],"metric,":[47],"making":[48],"it":[49],"applicable":[50],"a":[52,82],"wide":[53],"range":[54],"of":[55,84],"discovery":[57,64,90],"methods.":[58],"technique":[61],"increases":[62],"precision":[65],"by":[66],"reducing":[67],"false":[68],"positives":[69],"identifying":[71],"significant":[72],"relationships.":[74],"This":[75],"method":[76],"has":[77],"been":[78],"empirically":[79],"validated":[80],"across":[81],"variety":[83],"real-world":[85],"datasets":[86],"improve":[88],"efficiency":[91],"accuracy.":[93]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-28T00:00:00"}
