{"id":"https://openalex.org/W4380629626","doi":"https://doi.org/10.1162/dint_a_00216","title":"Predicting an Optimal Virtual Data Model for Uniform Access to Large Heterogeneous Data","display_name":"Predicting an Optimal Virtual Data Model for Uniform Access to Large Heterogeneous Data","publication_year":2023,"publication_date":"2023-06-13","ids":{"openalex":"https://openalex.org/W4380629626","doi":"https://doi.org/10.1162/dint_a_00216"},"language":"en","primary_location":{"id":"doi:10.1162/dint_a_00216","is_oa":true,"landing_page_url":"https://doi.org/10.1162/dint_a_00216","pdf_url":"https://direct.mit.edu/dint/article-pdf/doi/10.1162/dint_a_00216/2127029/dint_a_00216.pdf","source":{"id":"https://openalex.org/S4210186383","display_name":"Data Intelligence","issn_l":"2096-7004","issn":["2096-7004","2641-435X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/dint/article-pdf/doi/10.1162/dint_a_00216/2127029/dint_a_00216.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024346441","display_name":"Chahrazed B. Bachir Belmehdi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chahrazed B. Bachir Belmehdi","raw_affiliation_strings":["LabRI-SBA, Enterprise Information Systems, ESI-SBA Institute; Fraunhofer IAIS, Algeria; Germany"],"affiliations":[{"raw_affiliation_string":"LabRI-SBA, Enterprise Information Systems, ESI-SBA Institute; Fraunhofer IAIS, Algeria; Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069001751","display_name":"Abderrahmane Khiat","orcid":"https://orcid.org/0000-0002-7708-7971"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abderrahmane Khiat","raw_affiliation_strings":["LabRI-SBA, Enterprise Information Systems, ESI-SBA Institute; Fraunhofer IAIS, Algeria; Germany"],"affiliations":[{"raw_affiliation_string":"LabRI-SBA, Enterprise Information Systems, ESI-SBA Institute; Fraunhofer IAIS, Algeria; Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074529351","display_name":"Nabil Keskes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nabil Keskes","raw_affiliation_strings":["LabRI-SBA, Enterprise Information Systems, ESI-SBA Institute; Fraunhofer IAIS, Algeria; Germany"],"affiliations":[{"raw_affiliation_string":"LabRI-SBA, Enterprise Information Systems, ESI-SBA Institute; Fraunhofer IAIS, Algeria; Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5024346441"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05512717,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"6","issue":"2","first_page":"504","last_page":"530"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8657777309417725},{"id":"https://openalex.org/keywords/sparql","display_name":"SPARQL","score":0.6784969568252563},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5353029370307922},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.4920775294303894},{"id":"https://openalex.org/keywords/data-warehouse","display_name":"Data warehouse","score":0.4765501618385315},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.45252907276153564},{"id":"https://openalex.org/keywords/data-virtualization","display_name":"Data virtualization","score":0.44799157977104187},{"id":"https://openalex.org/keywords/query-plan","display_name":"Query plan","score":0.4368312954902649},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.37915095686912537},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3333972692489624},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.2827644944190979},{"id":"https://openalex.org/keywords/rdf","display_name":"RDF","score":0.28102588653564453},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.2768184542655945},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.24453791975975037},{"id":"https://openalex.org/keywords/virtualization","display_name":"Virtualization","score":0.22766229510307312},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.1607854962348938},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.12980526685714722},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.11880776286125183},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09800440073013306},{"id":"https://openalex.org/keywords/sargable","display_name":"Sargable","score":0.09452390670776367}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8657777309417725},{"id":"https://openalex.org/C41009113","wikidata":"https://www.wikidata.org/wiki/Q54871","display_name":"SPARQL","level":4,"score":0.6784969568252563},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5353029370307922},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.4920775294303894},{"id":"https://openalex.org/C135572916","wikidata":"https://www.wikidata.org/wiki/Q193351","display_name":"Data warehouse","level":2,"score":0.4765501618385315},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.45252907276153564},{"id":"https://openalex.org/C80344994","wikidata":"https://www.wikidata.org/wiki/Q5227369","display_name":"Data virtualization","level":4,"score":0.44799157977104187},{"id":"https://openalex.org/C2779729312","wikidata":"https://www.wikidata.org/wiki/Q784232","display_name":"Query plan","level":5,"score":0.4368312954902649},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37915095686912537},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3333972692489624},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2827644944190979},{"id":"https://openalex.org/C147497476","wikidata":"https://www.wikidata.org/wiki/Q54872","display_name":"RDF","level":3,"score":0.28102588653564453},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2768184542655945},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.24453791975975037},{"id":"https://openalex.org/C513985346","wikidata":"https://www.wikidata.org/wiki/Q270471","display_name":"Virtualization","level":3,"score":0.22766229510307312},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.1607854962348938},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.12980526685714722},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.11880776286125183},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09800440073013306},{"id":"https://openalex.org/C192939062","wikidata":"https://www.wikidata.org/wiki/Q104840822","display_name":"Sargable","level":4,"score":0.09452390670776367},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/dint_a_00216","is_oa":true,"landing_page_url":"https://doi.org/10.1162/dint_a_00216","pdf_url":"https://direct.mit.edu/dint/article-pdf/doi/10.1162/dint_a_00216/2127029/dint_a_00216.pdf","source":{"id":"https://openalex.org/S4210186383","display_name":"Data Intelligence","issn_l":"2096-7004","issn":["2096-7004","2641-435X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:8b456dcda066495591a9e711b89cda51","is_oa":true,"landing_page_url":"https://doaj.org/article/8b456dcda066495591a9e711b89cda51","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data Intelligence, Vol 6, Iss 2 (2024)","raw_type":"article"},{"id":"pmh:oai:publica.fraunhofer.de:publica/509719","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/509719","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"other"}],"best_oa_location":{"id":"doi:10.1162/dint_a_00216","is_oa":true,"landing_page_url":"https://doi.org/10.1162/dint_a_00216","pdf_url":"https://direct.mit.edu/dint/article-pdf/doi/10.1162/dint_a_00216/2127029/dint_a_00216.pdf","source":{"id":"https://openalex.org/S4210186383","display_name":"Data Intelligence","issn_l":"2096-7004","issn":["2096-7004","2641-435X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315718","host_organization_name":"The MIT Press","host_organization_lineage":["https://openalex.org/P4310315718"],"host_organization_lineage_names":["The MIT Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4380629626.pdf"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W1513761307","https://openalex.org/W1538208124","https://openalex.org/W1582987337","https://openalex.org/W1899232723","https://openalex.org/W2004817090","https://openalex.org/W2061253631","https://openalex.org/W2064675550","https://openalex.org/W2074148633","https://openalex.org/W2148417939","https://openalex.org/W2248883357","https://openalex.org/W2261525379","https://openalex.org/W2299775049","https://openalex.org/W2493064216","https://openalex.org/W2530314904","https://openalex.org/W2530589556","https://openalex.org/W2600790865","https://openalex.org/W2766220942","https://openalex.org/W2793317603","https://openalex.org/W2808986925","https://openalex.org/W2897938592","https://openalex.org/W2967736654","https://openalex.org/W2981075922","https://openalex.org/W2998249308","https://openalex.org/W3037821335","https://openalex.org/W3131431768","https://openalex.org/W3169157959","https://openalex.org/W4239912293","https://openalex.org/W6628546715","https://openalex.org/W6630621286","https://openalex.org/W6631577826","https://openalex.org/W6632190348","https://openalex.org/W6673559599","https://openalex.org/W6681530799","https://openalex.org/W6691580876","https://openalex.org/W6698183232","https://openalex.org/W6752497734","https://openalex.org/W6754075298","https://openalex.org/W6773922571","https://openalex.org/W6776041151","https://openalex.org/W6802752559","https://openalex.org/W7008772984"],"related_works":["https://openalex.org/W2043890830","https://openalex.org/W2787789727","https://openalex.org/W2923772254","https://openalex.org/W2810193297","https://openalex.org/W4320025244","https://openalex.org/W2917146715","https://openalex.org/W3086868968","https://openalex.org/W2810241552","https://openalex.org/W2621591477","https://openalex.org/W4380629626"],"abstract_inverted_index":{"ABSTRACT":[0],"The":[1],"growth":[2],"of":[3,99,133,156,192,252,261],"generated":[4],"data":[5,13,18,36,41,46,79,123,125,138,164,181,208,217],"in":[6,165,229,257],"the":[7,39,59,131,135,154,178,245,265,273],"industry":[8],"requires":[9],"new":[10,148],"efficient":[11],"big":[12],"integration":[14],"approaches":[15,49],"for":[16,95,140,264,272],"uniform":[17,66],"access":[19],"by":[20,50],"end-users":[21],"to":[22,70,111,158,176],"perform":[23],"better":[24],"business":[25],"operations.":[26,128],"Data":[27,32,62,199],"virtualization":[28],"systems,":[29],"including":[30],"Ontology-Based":[31],"Access":[33],"(ODBA)":[34],"query":[35,121,159,258],"on-the-fly":[37,69],"against":[38],"original":[40],"sources":[42,218],"without":[43],"any":[44],"prior":[45],"materialization.":[47],"Existing":[48],"design":[51],"use":[52],"a":[53,65,147,166,172,255],"fixed":[54],"model":[55,139,182,248,267,275],"e.g.,":[56,223],"TABULAR":[57],"as":[58,82,102,120],"only":[60],"Virtual":[61],"Model":[63],"\u2014":[64,190],"schema":[67],"built":[68],"load,":[71],"transform,":[72],"and":[73,127,160,169,202,204,211,213,226,234,269],"join":[74,103,161],"relevant":[75],"data.":[76],"While":[77],"other":[78],"models,":[80,209],"such":[81,101,119],"GRAPH":[83,210],"or":[84,104],"DOCUMENT,":[85],"are":[86,109],"more":[87,93],"flexible":[88],"and,":[89],"thus,":[90,254],"can":[91],"be":[92],"suitable":[94],"some":[96],"common":[97],"types":[98],"queries,":[100],"nested":[105],"queries.":[106,188],"Those":[107],"queries":[108,141],"hard":[110],"predict":[112,177],"because":[113],"they":[114],"depend":[115],"on":[116,142,153],"many":[117],"criteria,":[118],"plan,":[122],"model,":[124],"size,":[126],"To":[129],"address":[130],"problem":[132],"selecting":[134],"optimal":[136,179,246],"virtual":[137,180,207,247],"large":[143,162],"datasets,":[144],"we":[145],"present":[146],"approach":[149,194,242],"that":[150,240],"(1)":[151],"builds":[152],"principal":[155],"OBDA":[157],"heterogeneous":[163],"distributed":[167],"manner":[168],"(2)":[170],"calls":[171],"deep":[173],"learning":[174],"method":[175],"using":[183],"features":[184],"extracted":[185],"from":[186],"SPARQL":[187],"OPTIMA":[189],"implementation":[191],"our":[193,241],"currently":[195],"leverages":[196],"state-of-the-art":[197],"Big":[198],"technologies,":[200],"Apache-Spark":[201],"Graphx,":[203],"implements":[205],"two":[206],"TABULAR,":[212],"supports":[214],"out-of-the-box":[215],"five":[216],"models:":[219],"property":[220],"graph,":[221],"document-based,":[222],"wide-columnar,":[224],"relational,":[225],"tabular,":[227],"stored":[228],"Neo4j,":[230],"MongoDB,":[231],"Cassandra,":[232],"MySQL,":[233],"CSV":[235],"respectively.":[236],"Extensive":[237],"experiments":[238],"show":[239],"is":[243],"returning":[244],"with":[249],"an":[250],"accuracy":[251],"0.831,":[253],"reduction":[256],"execution":[259],"time":[260],"over":[262,270],"40%":[263],"tabular":[266],"selection":[268],"30%":[271],"graph":[274],"selection.":[276]},"counts_by_year":[],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
