{"id":"https://openalex.org/W2783928333","doi":"https://doi.org/10.1109/bigdata.2017.8258261","title":"Schema design support for semi-structured data: Finding the sweet spot between NF and De-NF","display_name":"Schema design support for semi-structured data: Finding the sweet spot between NF and De-NF","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2783928333","doi":"https://doi.org/10.1109/bigdata.2017.8258261","mag":"2783928333"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2017.8258261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8258261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022151010","display_name":"Vincent Reniers","orcid":"https://orcid.org/0000-0003-3895-702X"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]},{"id":"https://openalex.org/I4210114974","display_name":"IMEC","ror":"https://ror.org/02kcbn207","country_code":"BE","type":"nonprofit","lineage":["https://openalex.org/I4210114974"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Vincent Reniers","raw_affiliation_strings":["Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium"],"affiliations":[{"raw_affiliation_string":"Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium","institution_ids":["https://openalex.org/I4210114974","https://openalex.org/I99464096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012610674","display_name":"Dimitri Van Landuyt","orcid":"https://orcid.org/0000-0001-6597-2271"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]},{"id":"https://openalex.org/I4210114974","display_name":"IMEC","ror":"https://ror.org/02kcbn207","country_code":"BE","type":"nonprofit","lineage":["https://openalex.org/I4210114974"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Dimitri Van Landuyt","raw_affiliation_strings":["Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium"],"affiliations":[{"raw_affiliation_string":"Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium","institution_ids":["https://openalex.org/I4210114974","https://openalex.org/I99464096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078608981","display_name":"Ansar Rafique","orcid":"https://orcid.org/0000-0002-5881-7588"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]},{"id":"https://openalex.org/I4210114974","display_name":"IMEC","ror":"https://ror.org/02kcbn207","country_code":"BE","type":"nonprofit","lineage":["https://openalex.org/I4210114974"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Ansar Rafique","raw_affiliation_strings":["Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium"],"affiliations":[{"raw_affiliation_string":"Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium","institution_ids":["https://openalex.org/I4210114974","https://openalex.org/I99464096"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054031138","display_name":"Wouter Joosen","orcid":"https://orcid.org/0000-0002-7710-5092"},"institutions":[{"id":"https://openalex.org/I99464096","display_name":"KU Leuven","ror":"https://ror.org/05f950310","country_code":"BE","type":"education","lineage":["https://openalex.org/I99464096"]},{"id":"https://openalex.org/I4210114974","display_name":"IMEC","ror":"https://ror.org/02kcbn207","country_code":"BE","type":"nonprofit","lineage":["https://openalex.org/I4210114974"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Wouter Joosen","raw_affiliation_strings":["Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium"],"affiliations":[{"raw_affiliation_string":"Imec-DistriNet-KU Leuven, Department of Computer Science, Heverlee, Belgium","institution_ids":["https://openalex.org/I4210114974","https://openalex.org/I99464096"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5022151010"],"corresponding_institution_ids":["https://openalex.org/I4210114974","https://openalex.org/I99464096"],"apc_list":null,"apc_paid":null,"fwci":0.8288,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.78141786,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"2921","last_page":"2930"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8361438512802124},{"id":"https://openalex.org/keywords/nosql","display_name":"NoSQL","score":0.7328693270683289},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6308112740516663},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.4923974275588989},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.46348243951797485},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.45014622807502747},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4266948401927948},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.42193418741226196}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8361438512802124},{"id":"https://openalex.org/C2779599972","wikidata":"https://www.wikidata.org/wiki/Q82231","display_name":"NoSQL","level":3,"score":0.7328693270683289},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6308112740516663},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4923974275588989},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.46348243951797485},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.45014622807502747},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4266948401927948},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.42193418741226196}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2017.8258261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8258261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:lirias2repo.kuleuven.be:123456789/599630","is_oa":false,"landing_page_url":"https://lirias.kuleuven.be/bitstream/123456789/599630/1/bpod-2017-final.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE International Conference on Big Data, Boston, USA, 11-14 December 2017","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W120301130","https://openalex.org/W1174071369","https://openalex.org/W1485347932","https://openalex.org/W1505401439","https://openalex.org/W1556530724","https://openalex.org/W1975837734","https://openalex.org/W1985229168","https://openalex.org/W2002258219","https://openalex.org/W2043730929","https://openalex.org/W2048402958","https://openalex.org/W2120113879","https://openalex.org/W2167357284","https://openalex.org/W2240667924","https://openalex.org/W2288042494","https://openalex.org/W2316078626","https://openalex.org/W2438877200","https://openalex.org/W2510665119","https://openalex.org/W2516401821","https://openalex.org/W2520645465","https://openalex.org/W2527777278","https://openalex.org/W2531138495","https://openalex.org/W2537849499","https://openalex.org/W2569617198","https://openalex.org/W2730037187","https://openalex.org/W3138135046","https://openalex.org/W6604797525","https://openalex.org/W6627614193","https://openalex.org/W6633547644","https://openalex.org/W6634624844"],"related_works":["https://openalex.org/W1521050857","https://openalex.org/W2005967224","https://openalex.org/W2774614504","https://openalex.org/W2126420512","https://openalex.org/W2108941518","https://openalex.org/W3157676313","https://openalex.org/W1841120357","https://openalex.org/W2884115854","https://openalex.org/W2884906776","https://openalex.org/W4313444821"],"abstract_inverted_index":{"Contemporary":[0],"storage":[1,168],"systems":[2],"increasingly":[3],"offer":[4],"schema":[5,198,252],"flexibility":[6],"and":[7,44,58,101,126,129,146,169,184,210,219,248,255,262,274],"support":[8,227,254],"for":[9,17,42,138],"semi-structured":[10],"data":[11,26,37,76,123,136,158,180,197,220,224,272,279],"models.":[12],"This":[13],"is":[14,77,150],"the":[15,40,111,133,147,153,177,179,196,205,208],"case":[16],"document-oriented":[18],"databases,":[19],"which":[20,47],"as":[21,92,102,204],"such":[22,103,115,203],"allow":[23],"ingestion":[24],"of":[25,36,70,121,155,165,207,270,283],"from":[27],"heterogeneous":[28],"sources":[29],"(IoT,":[30],"sensors,":[31],"monitoring).":[32],"The":[33,66],"increased":[34],"influx":[35],"further":[38],"emphasizes":[39],"necessity":[41],"horizontal":[43,213],"elastic":[45],"scalability,":[46,167],"are":[48,73,161,229,233],"attained":[49,74],"in":[50,79,163,200,230],"NoSQL":[51],"document":[52,71,157,201,209],"stores":[53,72],"through":[54,63],"simplifying":[55],"query":[56,99,182],"functionality":[57],"relaxing":[59],"transactional":[60],"properties,":[61],"e.g.":[62],"eventual":[64],"consistency.":[65],"most":[67,134],"compelling":[68],"benefits":[69],"when":[75],"stored":[78],"a":[80,119,267],"denormalized":[81],"form":[82,113],"(De-NF).":[83],"For":[84],"example,":[85],"one":[86],"can":[87],"decide":[88],"to":[89,96,110,265],"store":[90],"relationships":[91],"an":[93,139],"embedded":[94],"copy":[95],"increase":[97],"read":[98],"performance":[100],"avoid":[104],"costly":[105],"cross-node":[106],"consultations.":[107],"In":[108,187],"comparison":[109],"normalized":[112],"(NF),":[114],"designs":[116],"come":[117],"at":[118],"cost":[120,284],"additional":[122],"duplication,":[124],"consistency":[125],"decreased":[127],"write-":[128],"update":[130],"performance.":[131],"Determining":[132],"appropriate":[135],"model":[137,225],"application":[140,148],"however":[141],"depends":[142],"on":[143,176],"many":[144],"factors,":[145],"developer":[149],"faced":[151],"with":[152],"complexity":[154],"designing":[156],"models":[159,273,280],"that":[160,194,235,257,286],"optimized":[162],"terms":[164],"performance,":[166],"memory":[170],"size,":[171],"all":[172,240],"requiring":[173],"in-depth":[174],"knowledge":[175],"technology,":[178],"meta-model,":[181],"plans":[183],"expected":[185],"workloads.":[186],"this":[188],"paper,":[189],"we":[190,244],"first":[191],"discuss":[192],"factors":[193],"impact":[195],"design":[199,226,253],"stores,":[202],"nature":[206],"its":[211],"attributes,":[212],"partitioning,":[214],"index":[215],"selection,":[216],"workload":[217],"variability,":[218],"uniformity.":[221],"Although":[222],"some":[223],"tools":[228],"existence,":[231],"there":[232],"none":[234],"systematically":[236],"take":[237],"into":[238],"account":[239],"these":[241,277],"factors.":[242],"Then,":[243],"outline":[245],"our":[246],"vision":[247],"roadmap":[249],"towards":[250],"systematic":[251],"tooling":[256],"involves":[258],"(i)":[259],"leveraging":[260],"heuristics":[261],"common":[263],"tactics":[264],"generate":[266],"finite":[268],"number":[269],"candidate":[271,278],"(ii)":[275],"ranking":[276],"by":[281],"means":[282],"functions":[285],"express":[287],"their":[288],"cost-effectiveness.":[289]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
