{"id":"https://openalex.org/W7135044171","doi":"https://doi.org/10.48550/arxiv.2603.10809","title":"Beyond Standard Datacubes: Extracting Features from Irregular and Branching Earth System Data","display_name":"Beyond Standard Datacubes: Extracting Features from Irregular and Branching Earth System Data","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135044171","doi":"https://doi.org/10.48550/arxiv.2603.10809"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10809","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10809","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10809","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044331485","display_name":"Mathilde Leuridan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Leuridan, Mathilde","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128840598","display_name":"James Hawkes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hawkes, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034071738","display_name":"Tiago Quintino","orcid":"https://orcid.org/0000-0003-0602-0531"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Quintino, Tiago","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128846404","display_name":"Martin Schultz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schultz, Martin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044331485"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.3172000050544739,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.3172000050544739,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.09179999679327011,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.0812000036239624,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hypercube","display_name":"Hypercube","score":0.5587999820709229},{"id":"https://openalex.org/keywords/data-cube","display_name":"Data cube","score":0.5396999716758728},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5080999732017517},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.47589999437332153},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4733000099658966},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.46889999508857727},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4528000056743622},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.4442000091075897},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4275999963283539},{"id":"https://openalex.org/keywords/online-analytical-processing","display_name":"Online analytical processing","score":0.382999986410141}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7583000063896179},{"id":"https://openalex.org/C50820777","wikidata":"https://www.wikidata.org/wiki/Q213723","display_name":"Hypercube","level":2,"score":0.5587999820709229},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5493999719619751},{"id":"https://openalex.org/C78168278","wikidata":"https://www.wikidata.org/wiki/Q5227269","display_name":"Data cube","level":2,"score":0.5396999716758728},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5080999732017517},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.47859999537467957},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.47589999437332153},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4733000099658966},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.46889999508857727},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4528000056743622},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.4442000091075897},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C201932085","wikidata":"https://www.wikidata.org/wiki/Q642514","display_name":"Online analytical processing","level":3,"score":0.382999986410141},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3700000047683716},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.367000013589859},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.328900009393692},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2985000014305115},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.28850001096725464},{"id":"https://openalex.org/C100463513","wikidata":"https://www.wikidata.org/wiki/Q5227322","display_name":"Data model (GIS)","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C13670688","wikidata":"https://www.wikidata.org/wiki/Q3500548","display_name":"Space partitioning","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C206175624","wikidata":"https://www.wikidata.org/wiki/Q595731","display_name":"Branching (polymer chemistry)","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.27090001106262207},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C48105269","wikidata":"https://www.wikidata.org/wiki/Q1141160","display_name":"Header","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10809","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10809","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10809","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10809","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Earth":[0,255],"science":[1,256],"datasets":[2,199],"are":[3,201],"growing":[4],"rapidly":[5],"in":[6],"both":[7],"volume":[8],"and":[9,21,30,45,78,92,99,121,174,188,219,249],"structural":[10],"complexity.":[11],"They":[12],"increasingly":[13],"contain":[14],"richly":[15],"labelled":[16],"data":[17,56,66,83,119,132,150,158,163,172,216,221,234],"with":[18,197,236],"heterogeneous":[19,254],"metadata":[20],"complex":[22,82,198],"internal":[23],"constraints":[24],"that":[25,145,200,231],"impose":[26],"dependencies":[27],"between":[28,214],"variables":[29],"dimensions.":[31],"Datacubes":[32],"have":[33],"become":[34],"a":[35,64,108,228,245],"common":[36],"abstraction":[37,165],"for":[38,247],"organising":[39],"such":[40],"datasets,":[41],"but":[42],"traditional":[43,206],"dense":[44],"orthogonal":[46],"datacube":[47],"models":[48,218],"struggle":[49],"to":[50,96,105,203,252],"represent":[51],"irregular,":[52],"sparse":[53],"or":[54],"branching":[55],"spaces":[57],"efficiently.":[58],"In":[59,224],"this":[60,90,136],"paper,":[61],"we":[62,111,138],"introduce":[63],"generalised":[65],"hypercube":[67,164,217],"representation":[68,91],"based":[69],"on":[70,135,148],"compressed":[71,117],"tree":[72,118],"structures,":[73],"which":[74],"enables":[75,169,192],"an":[76,140],"accurate":[77],"compact":[79],"description":[80],"of":[81,89,116,184,195],"spaces.":[84],"We":[85,180],"describe":[86],"the":[87,113,153,162,167,182,185,212],"design":[88],"analyse":[93],"its":[94],"ability":[95],"capture":[97],"sparsity":[98],"conditional":[100],"relationships":[101],"while":[102],"remaining":[103],"efficient":[104,220],"traverse.":[106],"Using":[107],"concrete":[109],"implementation,":[110],"study":[112],"performance":[114,183],"characteristics":[115],"hypercubes":[120,151],"demonstrate":[122],"their":[123],"effectiveness":[124],"as":[125],"fast,":[126],"cache-like":[127],"indices":[128],"over":[129],"large":[130,253],"backend":[131],"stores.":[133],"Building":[134],"representation,":[137],"present":[139],"integrated":[141,186],"feature":[142,237],"extraction":[143,173,238],"system":[144,168,187],"operates":[146],"directly":[147],"tree-based":[149,233],"within":[152],"Polytope":[154],"framework.":[155],"By":[156],"embedding":[157],"access":[159,178,207,222,251],"strategies":[160],"into":[161],"itself,":[166],"precise,":[170],"sub-field":[171],"supports":[175],"flexible,":[176],"user-driven":[177],"patterns.":[179],"evaluate":[181],"show":[189],"how":[190],"it":[191,226],"new":[193],"ways":[194],"interacting":[196],"difficult":[202],"support":[204],"using":[205],"models.":[208],"This":[209],"work":[210],"bridges":[211],"gap":[213],"expressive":[215],"methods.":[223],"particular,":[225],"provides":[227],"unified":[229],"framework":[230],"combines":[232],"representations":[235],"capabilities.":[239],"The":[240],"proposed":[241],"approach":[242],"therefore":[243],"offers":[244],"foundation":[246],"scalable":[248],"user-centric":[250],"datasets.":[257]},"counts_by_year":[],"updated_date":"2026-03-13T14:25:03.468858","created_date":"2026-03-13T00:00:00"}
