{"id":"https://openalex.org/W4406458148","doi":"https://doi.org/10.1109/bigdata62323.2024.10825877","title":"Decoding the Granular Puzzle of Macromolecules: Efficient 3D Protein Structure Alignment in the Age of Big Data with Apache Spark","display_name":"Decoding the Granular Puzzle of Macromolecules: Efficient 3D Protein Structure Alignment in the Age of Big Data with Apache Spark","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458148","doi":"https://doi.org/10.1109/bigdata62323.2024.10825877"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825877","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825877","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004555174","display_name":"Dariusz Mrozek","orcid":"https://orcid.org/0000-0001-6764-6656"},"institutions":[{"id":"https://openalex.org/I119004910","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442","country_code":"PL","type":"education","lineage":["https://openalex.org/I119004910"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Bo\u017cena Ma\u0142ysiak-Mrozek","raw_affiliation_strings":["Silesian University of Technology,Department of Distributed Systems and Informatic Devices,Gliwice,Poland"],"affiliations":[{"raw_affiliation_string":"Silesian University of Technology,Department of Distributed Systems and Informatic Devices,Gliwice,Poland","institution_ids":["https://openalex.org/I119004910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115904479","display_name":"Paulina Paw\u0142owicz","orcid":null},"institutions":[{"id":"https://openalex.org/I119004910","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442","country_code":"PL","type":"education","lineage":["https://openalex.org/I119004910"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Paulina Paw\u0142owicz","raw_affiliation_strings":["Silesian University of Technology,Department of Applied Informatics,Gliwice,Poland"],"affiliations":[{"raw_affiliation_string":"Silesian University of Technology,Department of Applied Informatics,Gliwice,Poland","institution_ids":["https://openalex.org/I119004910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002623265","display_name":"Vaidy Sunderam","orcid":"https://orcid.org/0000-0002-5128-7852"},"institutions":[{"id":"https://openalex.org/I150468666","display_name":"Emory University","ror":"https://ror.org/03czfpz43","country_code":"US","type":"education","lineage":["https://openalex.org/I150468666"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vaidy Sunderam","raw_affiliation_strings":["Emory University,Department of Computer Science,Atlanta,GA,USA"],"affiliations":[{"raw_affiliation_string":"Emory University,Department of Computer Science,Atlanta,GA,USA","institution_ids":["https://openalex.org/I150468666"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003302169","display_name":"Che\u2010Lun Hung","orcid":"https://orcid.org/0000-0002-8906-9367"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Che-Lun Hung","raw_affiliation_strings":["National Yang Ming Chiao Tung University,Institute of Biomedical Informatics,Taipei City,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University,Institute of Biomedical Informatics,Taipei City,Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010693606","display_name":"Andrzej Kwiecie\u0144","orcid":"https://orcid.org/0000-0003-1447-3303"},"institutions":[{"id":"https://openalex.org/I119004910","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442","country_code":"PL","type":"education","lineage":["https://openalex.org/I119004910"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Andrzej Kwiecie\u0144","raw_affiliation_strings":["Silesian University of Technology,Department of Distributed Systems and Informatic Devices,Gliwice,Poland"],"affiliations":[{"raw_affiliation_string":"Silesian University of Technology,Department of Distributed Systems and Informatic Devices,Gliwice,Poland","institution_ids":["https://openalex.org/I119004910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004555174","display_name":"Dariusz Mrozek","orcid":"https://orcid.org/0000-0001-6764-6656"},"institutions":[{"id":"https://openalex.org/I119004910","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442","country_code":"PL","type":"education","lineage":["https://openalex.org/I119004910"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Dariusz Mrozek","raw_affiliation_strings":["Silesian University of Technology,Department of Applied Informatics,Gliwice,Poland"],"affiliations":[{"raw_affiliation_string":"Silesian University of Technology,Department of Applied Informatics,Gliwice,Poland","institution_ids":["https://openalex.org/I119004910"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5004555174"],"corresponding_institution_ids":["https://openalex.org/I119004910"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19218176,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8317","last_page":"8324"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.8789582252502441},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7933213710784912},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.54941326379776},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5480991005897522},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.1459287405014038},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.13739269971847534},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0963461697101593}],"concepts":[{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.8789582252502441},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7933213710784912},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.54941326379776},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5480991005897522},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.1459287405014038},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.13739269971847534},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0963461697101593}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825877","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825877","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320324727","display_name":"Silesian University of Technology","ror":"https://ror.org/02dyjk442"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2037963718","https://openalex.org/W2045268887","https://openalex.org/W2091506983","https://openalex.org/W2097642323","https://openalex.org/W2105803236","https://openalex.org/W2121184378","https://openalex.org/W2126341972","https://openalex.org/W2130479394","https://openalex.org/W2132885928","https://openalex.org/W2137955461","https://openalex.org/W2139918973","https://openalex.org/W2152326664","https://openalex.org/W2165025542","https://openalex.org/W2281326002","https://openalex.org/W2557876094","https://openalex.org/W2780263532","https://openalex.org/W2883854517","https://openalex.org/W2900134604","https://openalex.org/W2907280276","https://openalex.org/W2972411752","https://openalex.org/W3090813340","https://openalex.org/W3212133368","https://openalex.org/W4210851756","https://openalex.org/W4244145013","https://openalex.org/W4387341993"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4390608645","https://openalex.org/W4405901645","https://openalex.org/W4394895745","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W3202731209","https://openalex.org/W3211874991"],"abstract_inverted_index":{"Proteins":[0],"are":[1,23],"complex":[2],"biological":[3,29],"information":[4],"granules":[5],"that":[6,111,129],"play":[7],"a":[8,108],"crucial":[9],"role":[10],"in":[11],"various":[12],"cellular":[13],"processes":[14],"within":[15,71,116],"living":[16],"organisms.":[17],"Processing":[18],"3D":[19,41,96],"protein":[20,42,47,72,150],"structures,":[21],"which":[22],"the":[24,28,100,117,122,135,146],"most":[25],"informative":[26],"from":[27],"point":[30],"of":[31,82,94,149],"view,":[32],"is":[33,139],"both":[34,140],"intricate":[35],"and":[36,52,68,74,142],"time-consuming.":[37],"In":[38,86],"particular,":[39],"performing":[40],"structure":[43,151],"searches":[44],"against":[45],"large":[46,80],"datasets":[48],"involves":[49],"identifying":[50],"similarities":[51],"conducting":[53],"structural":[54,97],"alignments":[55],"across":[56],"numerous":[57],"molecules":[58],"(granules).":[59],"This":[60],"task":[61],"demands":[62],"advanced":[63],"methods":[64],"for":[65,121],"matching":[66],"identical":[67],"similar":[69],"regions":[70],"structures":[73],"substantial":[75],"computational":[76],"resources":[77],"to":[78],"handle":[79],"collections":[81],"macromolecular":[83],"data":[84,104,114,118],"efficiently.":[85],"this":[87,130],"paper,":[88],"we":[89],"present":[90],"our":[91],"parallel":[92],"implementation":[93],"scalable":[95],"alignment":[98,123],"on":[99],"Apache":[101],"Spark":[102,113,136],"big":[103],"platform.":[105],"We":[106],"describe":[107],"customized":[109],"approach":[110],"leverages":[112],"transformations":[115],"processing":[119,137],"pipeline":[120],"process.":[124],"Our":[125],"experimental":[126],"results":[127],"demonstrate":[128],"solution,":[131],"tightly":[132],"integrated":[133],"with":[134,145],"model,":[138],"efficient":[141],"scalable,":[143],"even":[144],"increasing":[147],"volume":[148],"data.":[152]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
