{"id":"https://openalex.org/W4387091766","doi":"https://doi.org/10.1155/2023/6900908","title":"Big Data Analytics for the ATLAS EventIndex Project with Apache Spark","display_name":"Big Data Analytics for the ATLAS EventIndex Project with Apache Spark","publication_year":2023,"publication_date":"2023-09-27","ids":{"openalex":"https://openalex.org/W4387091766","doi":"https://doi.org/10.1155/2023/6900908"},"language":"en","primary_location":{"id":"doi:10.1155/2023/6900908","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2023/6900908","pdf_url":"https://downloads.hindawi.com/journals/cmm/2023/6900908.pdf","source":{"id":"https://openalex.org/S4210240678","display_name":"Computational and Mathematical Methods","issn_l":"2577-7408","issn":["2577-7408"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational and Mathematical Methods","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://downloads.hindawi.com/journals/cmm/2023/6900908.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043539171","display_name":"\u00c1lvaro Fern\u00e1ndez Casan\u00ed","orcid":"https://orcid.org/0000-0003-1394-509X"},"institutions":[{"id":"https://openalex.org/I4210101901","display_name":"Instituto de F\u00edsica Corpuscular","ror":"https://ror.org/017xch102","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I16097986","https://openalex.org/I4210101901"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"\u00c1lvaro Fern\u00e1ndez Casan\u00ed","raw_affiliation_strings":["Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain","institution_ids":["https://openalex.org/I4210101901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074160158","display_name":"Carlos Garc\u00eda Montoro","orcid":null},"institutions":[{"id":"https://openalex.org/I4210101901","display_name":"Instituto de F\u00edsica Corpuscular","ror":"https://ror.org/017xch102","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I16097986","https://openalex.org/I4210101901"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Carlos Garc\u00eda Montoro","raw_affiliation_strings":["Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain","institution_ids":["https://openalex.org/I4210101901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090992962","display_name":"S. Gonz\u00e1lez de la Hoz","orcid":"https://orcid.org/0000-0001-5304-5390"},"institutions":[{"id":"https://openalex.org/I4210101901","display_name":"Instituto de F\u00edsica Corpuscular","ror":"https://ror.org/017xch102","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I16097986","https://openalex.org/I4210101901"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Santiago Gonz\u00e1lez de la Hoz","raw_affiliation_strings":["Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain","institution_ids":["https://openalex.org/I4210101901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120645134","display_name":"J. Salt","orcid":"https://orcid.org/0000-0003-4181-2788"},"institutions":[{"id":"https://openalex.org/I4210101901","display_name":"Instituto de F\u00edsica Corpuscular","ror":"https://ror.org/017xch102","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I16097986","https://openalex.org/I4210101901"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 Salt","raw_affiliation_strings":["Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain","institution_ids":["https://openalex.org/I4210101901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100778500","display_name":"J. S\u00e1nchez","orcid":"https://orcid.org/0000-0001-9913-310X"},"institutions":[{"id":"https://openalex.org/I4210101901","display_name":"Instituto de F\u00edsica Corpuscular","ror":"https://ror.org/017xch102","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I16097986","https://openalex.org/I4210101901"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Javier S\u00e1nchez","raw_affiliation_strings":["Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain","institution_ids":["https://openalex.org/I4210101901"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5105978135","display_name":"M. Villaplana Perez","orcid":"https://orcid.org/0000-0002-0048-4602"},"institutions":[{"id":"https://openalex.org/I4210101901","display_name":"Instituto de F\u00edsica Corpuscular","ror":"https://ror.org/017xch102","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I16097986","https://openalex.org/I4210101901"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Miguel Villaplana P\u00e9rez","raw_affiliation_strings":["Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain"],"affiliations":[{"raw_affiliation_string":"Institute of Corpuscular Physics-IFIC (CSIC/UV), E-46980 Paterna, Spain","institution_ids":["https://openalex.org/I4210101901"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043539171"],"corresponding_institution_ids":["https://openalex.org/I4210101901"],"apc_list":{"value":750,"currency":"USD","value_usd":750},"apc_paid":{"value":750,"currency":"USD","value_usd":750},"fwci":0.1997,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.51598333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"2023","issue":null,"first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7586520910263062},{"id":"https://openalex.org/keywords/large-hadron-collider","display_name":"Large Hadron Collider","score":0.6303894519805908},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6246811151504517},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.6181203722953796},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5905181765556335},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5020060539245605},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4960251748561859},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2962731719017029}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7586520910263062},{"id":"https://openalex.org/C87668248","wikidata":"https://www.wikidata.org/wiki/Q40605","display_name":"Large Hadron Collider","level":2,"score":0.6303894519805908},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6246811151504517},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.6181203722953796},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5905181765556335},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5020060539245605},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4960251748561859},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2962731719017029},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1155/2023/6900908","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2023/6900908","pdf_url":"https://downloads.hindawi.com/journals/cmm/2023/6900908.pdf","source":{"id":"https://openalex.org/S4210240678","display_name":"Computational and Mathematical Methods","issn_l":"2577-7408","issn":["2577-7408"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational and Mathematical Methods","raw_type":"journal-article"},{"id":"pmh:oai:digital.csic.es:10261/362567","is_oa":true,"landing_page_url":"http://hdl.handle.net/10261/362567","pdf_url":null,"source":{"id":"https://openalex.org/S4306400616","display_name":"DIGITAL.CSIC (Spanish National Research Council (CSIC))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I134820265","host_organization_name":"Consejo Superior de Investigaciones Cient\u00edficas","host_organization_lineage":["https://openalex.org/I134820265"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"art\u00edculo"},{"id":"pmh:oai:doaj.org/article:9e2e9eacf95a481ea05bb0adfdd21529","is_oa":true,"landing_page_url":"https://doaj.org/article/9e2e9eacf95a481ea05bb0adfdd21529","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational and Mathematical Methods, Vol 2023 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1155/2023/6900908","is_oa":true,"landing_page_url":"https://doi.org/10.1155/2023/6900908","pdf_url":"https://downloads.hindawi.com/journals/cmm/2023/6900908.pdf","source":{"id":"https://openalex.org/S4210240678","display_name":"Computational and Mathematical Methods","issn_l":"2577-7408","issn":["2577-7408"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational and Mathematical Methods","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6399999856948853,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387091766.pdf"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W174317439","https://openalex.org/W2105947650","https://openalex.org/W2119738171","https://openalex.org/W2173213060","https://openalex.org/W2242261858","https://openalex.org/W2325870937","https://openalex.org/W2327481811","https://openalex.org/W2542459869","https://openalex.org/W2583868991","https://openalex.org/W2945217737","https://openalex.org/W3022870279","https://openalex.org/W3048946700","https://openalex.org/W4206932046","https://openalex.org/W4211046286","https://openalex.org/W4232091795","https://openalex.org/W4360980786","https://openalex.org/W4387091766"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2095118173","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991","https://openalex.org/W3191926225"],"abstract_inverted_index":{"The":[0,41,250],"ATLAS":[1,16],"EventIndex":[2,155,190],"was":[3],"designed":[4],"to":[5,68,93,235,238,265],"provide":[6,94],"a":[7,138,242],"global":[8],"event":[9],"catalogue":[10],"and":[11,24,28,34,52,87,108,111,143,209,233,244,268],"limited":[12],"event-level":[13],"metadata":[14],"for":[15,104,125,147,197,211],"experiment":[17],"of":[18,74,115,153,188,199,214,217],"the":[19,58,72,97,101,127,132,149,154,182,189,195,202,212,239,257,272],"Large":[20],"Hadron":[21],"Collider":[22],"(LHC)":[23],"their":[25],"analysis":[26],"groups":[27],"users":[29],"during":[30],"Run":[31,43,75],"2":[32],"(2015-2018)":[33],"has":[35,48],"been":[36],"running":[37],"in":[38,46,85,91,145,158,241],"production":[39,54],"since.":[40],"LHC":[42],"3,":[44],"started":[45],"2022,":[47],"seen":[49],"increased":[50,105],"data-taking":[51],"simulation":[53],"rates,":[55],"with":[56,112],"which":[57,261],"current":[59,102],"infrastructure":[60],"would":[61],"still":[62],"cope":[63],"but":[64],"may":[65],"be":[66,177,227],"stretched":[67],"its":[69],"limits":[70],"by":[71,229],"end":[73],"3.":[76],"A":[77],"new":[78,120,133,139],"core":[79],"storage":[80],"service":[81],"is":[82,89],"being":[83,123],"developed":[84,124],"HBase/Phoenix,":[86],"there":[88],"work":[90],"progress":[92],"at":[95,169],"least":[96],"same":[98,183,203],"functionality":[99],"as":[100],"one":[103],"data":[106,151,166,240,254,259],"ingestion":[107],"search":[109,196],"rates":[110],"increasing":[113],"volumes":[114],"stored":[116,157],"data.":[117],"In":[118],"addition,":[119],"tools":[121,232,252],"are":[122,192,222],"solving":[126],"needed":[128],"access":[129,237,255],"cases":[130,187],"within":[131,181],"storage.":[134,274],"This":[135],"paper":[136],"describes":[137],"tool":[140],"using":[141,247],"Spark":[142,173,248],"implemented":[144],"Scala":[146],"accessing":[148],"big":[150],"quantities":[152],"project":[156,191],"HBase/Phoenix.":[159],"With":[160],"this":[161],"tool,":[162],"we":[163],"can":[164,176,226],"offer":[165],"discovery":[167],"capabilities":[168],"different":[170,205,220],"granularities,":[171],"providing":[172],"Dataframes":[174],"that":[175],"used":[178,228],"or":[179,204],"refined":[180],"framework.":[184],"Data":[185],"analytic":[186],"implemented,":[193],"like":[194],"duplicates":[198],"events":[200,218],"from":[201,256],"datasets.":[206],"An":[207],"algorithm":[208],"implementation":[210],"calculation":[213],"overlap":[215],"matrices":[216],"across":[219],"datasets":[221],"presented.":[223],"Our":[224],"approach":[225],"other":[230],"higher-level":[231],"users,":[234],"ease":[236],"performant":[243],"standard":[245],"way":[246],"abstractions.":[249],"provided":[251],"decouple":[253],"actual":[258],"schema,":[260],"makes":[262],"it":[263],"convenient":[264],"hide":[266],"complexity":[267],"possible":[269],"changes":[270],"on":[271],"backed":[273]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2023-09-28T00:00:00"}
