{"id":"https://openalex.org/W7152556926","doi":"https://doi.org/10.48550/arxiv.2604.06230","title":"Ontology-based knowledge graph infrastructure for interoperable atomistic simulation data","display_name":"Ontology-based knowledge graph infrastructure for interoperable atomistic simulation data","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7152556926","doi":"https://doi.org/10.48550/arxiv.2604.06230"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06230","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06230","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06230","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033988090","display_name":"Abril Az\u00f3car Guzm\u00e1n","orcid":"https://orcid.org/0000-0001-7564-7990"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guzman, Abril Azocar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133249935","display_name":"Sarath Menon","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Menon, Sarath","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133292379","display_name":"Tilmann Hickel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hickel, Tilmann","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133257422","display_name":"Stefan Sandfeld","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sandfeld, Stefan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5033988090"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9506999850273132,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9506999850273132,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.024800000712275505,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11784","display_name":"CO2 Reduction Techniques and Catalysts","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/2105","display_name":"Renewable Energy, Sustainability and the Environment"},"field":{"id":"https://openalex.org/fields/21","display_name":"Energy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7889999747276306},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6233000159263611},{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.5802000164985657},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.474700003862381},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.46230000257492065},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4586000144481659},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.3619999885559082},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.3465999960899353},{"id":"https://openalex.org/keywords/software-versioning","display_name":"Software versioning","score":0.34470000863075256}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7889999747276306},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7681999802589417},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6233000159263611},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.5802000164985657},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5285000205039978},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.474700003862381},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.46230000257492065},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4586000144481659},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.40290001034736633},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.382999986410141},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.3619999885559082},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.3465999960899353},{"id":"https://openalex.org/C198140048","wikidata":"https://www.wikidata.org/wiki/Q10859422","display_name":"Software versioning","level":3,"score":0.34470000863075256},{"id":"https://openalex.org/C16311509","wikidata":"https://www.wikidata.org/wiki/Q4148050","display_name":"Dependency graph","level":3,"score":0.34360000491142273},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.30059999227523804},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C176225458","wikidata":"https://www.wikidata.org/wiki/Q595971","display_name":"Graph database","level":3,"score":0.29429998993873596},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.2937999963760376},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C110903229","wikidata":"https://www.wikidata.org/wiki/Q7449064","display_name":"Semantic integration","level":4,"score":0.27799999713897705},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2777999937534332},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C76518257","wikidata":"https://www.wikidata.org/wiki/Q271680","display_name":"Software framework","level":5,"score":0.27250000834465027},{"id":"https://openalex.org/C174683762","wikidata":"https://www.wikidata.org/wiki/Q609588","display_name":"Component-based software engineering","level":4,"score":0.2687999904155731},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.26249998807907104},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C2984968299","wikidata":"https://www.wikidata.org/wiki/Q1077784","display_name":"Software tool","level":3,"score":0.2615000009536743},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2513999938964844},{"id":"https://openalex.org/C106937863","wikidata":"https://www.wikidata.org/wiki/Q7236518","display_name":"Power graph analysis","level":3,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06230","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06230","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06230","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06230","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6417540907859802}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,41,132],"reuse":[1,157],"of":[2,17,20,66,94,100,105,129,158],"atomistic":[3,34,159],"simulation":[4,35,61,160],"data":[5,36,52,69],"is":[6],"often":[7],"limited":[8],"by":[9],"heterogeneous":[10],"formats,":[11],"incomplete":[12],"metadata,":[13],"and":[14,22,32,58,83,103,126,156],"a":[15,38,47,76,118,148],"lack":[16],"standardized":[18],"representations":[19],"workflows":[21,62,114],"provenance.":[23],"Here":[24],"we":[25],"present":[26],"an":[27],"ontology-based":[28],"infrastructure":[29],"for":[30,151],"representing":[31],"integrating":[33],"as":[37],"knowledge":[39,134],"graph.":[40],"approach":[42],"combines":[43],"domain":[44],"ontologies":[45],"with":[46],"software":[48],"framework":[49,150],"that":[50],"enables":[51],"capture":[53],"both":[54,122],"from":[55,60,70,109],"existing":[56,110],"datasets":[57],"directly":[59],"at":[63],"the":[64,92,153],"point":[65],"generation.":[67],"Heterogeneous":[68],"multiple":[71],"sources":[72],"are":[73,115],"normalized":[74],"into":[75],"common,":[77],"ontology-aligned":[78],"representation,":[79],"enabling":[80,121],"consistent":[81],"querying":[82],"analysis":[84,99],"across":[85],"datasets.":[86],"We":[87],"demonstrate":[88],"these":[89],"capabilities":[90],"through":[91],"integration":[93],"grain":[95],"boundary":[96],"data,":[97],"cross-dataset":[98],"material":[101],"properties,":[102],"extraction":[104],"derived":[106],"thermodynamic":[107],"quantities":[108],"simulations.":[111],"In":[112],"addition,":[113],"represented":[116],"in":[117],"machine-readable":[119],"form,":[120],"forward":[123],"provenance":[124],"tracking":[125],"partial":[127],"reconstruction":[128],"computational":[130,143],"procedures.":[131],"resulting":[133],"graph":[135],"contains":[136],"over":[137],"750,000":[138],"triples":[139],"describing":[140],"nearly":[141],"8,000":[142],"samples.":[144],"This":[145],"work":[146],"provides":[147],"practical":[149],"improving":[152],"findability,":[154],"interoperability,":[155],"data.":[161]},"counts_by_year":[],"updated_date":"2026-04-10T06:07:51.998497","created_date":"2026-04-10T00:00:00"}
