{"id":"https://openalex.org/W2912018214","doi":"https://doi.org/10.1109/bigdata.2018.8622171","title":"Scalable Construction of Text Indexes with Thrill","display_name":"Scalable Construction of Text Indexes with Thrill","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2912018214","doi":"https://doi.org/10.1109/bigdata.2018.8622171","mag":"2912018214"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622171","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622171","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5445/ir/1000097631","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042928114","display_name":"Timo Bingmann","orcid":"https://orcid.org/0000-0003-0529-0097"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Timo Bingmann","raw_affiliation_strings":["Institute of Theoretical Informatics, Karlsruhe Institute of Technology, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Theoretical Informatics, Karlsruhe Institute of Technology, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061901128","display_name":"Simon Gog","orcid":"https://orcid.org/0000-0002-5450-8630"},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Simon Gog","raw_affiliation_strings":["Institute of Theoretical Informatics, Karlsruhe Institute of Technology, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Theoretical Informatics, Karlsruhe Institute of Technology, Germany","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042131337","display_name":"Florian Kurpicz","orcid":"https://orcid.org/0000-0002-2379-9455"},"institutions":[{"id":"https://openalex.org/I200332995","display_name":"TU Dortmund University","ror":"https://ror.org/01k97gp34","country_code":"DE","type":"education","lineage":["https://openalex.org/I200332995"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Florian Kurpicz","raw_affiliation_strings":["Department of Computer Science, Technische Universit\u00e4t Dortmund, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Technische Universit\u00e4t Dortmund, Germany","institution_ids":["https://openalex.org/I200332995"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042928114"],"corresponding_institution_ids":["https://openalex.org/I102335020"],"apc_list":null,"apc_paid":null,"fwci":0.1629,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.61594085,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"2719","issue":null,"first_page":"634","last_page":"643"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8484416007995605},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7943911552429199},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.7068495154380798},{"id":"https://openalex.org/keywords/suffix-array","display_name":"Suffix array","score":0.6867175102233887},{"id":"https://openalex.org/keywords/compressed-suffix-array","display_name":"Compressed suffix array","score":0.6631593704223633},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.622896671295166},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5759716629981995},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5071241855621338},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.48293420672416687},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4543664753437042},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4520178437232971},{"id":"https://openalex.org/keywords/out-of-core-algorithm","display_name":"Out-of-core algorithm","score":0.41685181856155396},{"id":"https://openalex.org/keywords/suffix-tree","display_name":"Suffix tree","score":0.34042736887931824},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3231264352798462},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.27915826439857483},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1967250406742096},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16602247953414917},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08457988500595093},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07436856627464294}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8484416007995605},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7943911552429199},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.7068495154380798},{"id":"https://openalex.org/C2779259728","wikidata":"https://www.wikidata.org/wiki/Q281472","display_name":"Suffix array","level":3,"score":0.6867175102233887},{"id":"https://openalex.org/C100903775","wikidata":"https://www.wikidata.org/wiki/Q5157028","display_name":"Compressed suffix array","level":4,"score":0.6631593704223633},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.622896671295166},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5759716629981995},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5071241855621338},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.48293420672416687},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4543664753437042},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4520178437232971},{"id":"https://openalex.org/C79470037","wikidata":"https://www.wikidata.org/wiki/Q279748","display_name":"Out-of-core algorithm","level":2,"score":0.41685181856155396},{"id":"https://openalex.org/C2781166958","wikidata":"https://www.wikidata.org/wiki/Q1426863","display_name":"Suffix tree","level":3,"score":0.34042736887931824},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3231264352798462},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27915826439857483},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1967250406742096},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16602247953414917},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08457988500595093},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07436856627464294},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/bigdata.2018.8622171","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622171","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:EVASTAR-Karlsruhe.de:1000097631","is_oa":false,"landing_page_url":"https://publikationen.bibliothek.kit.edu/1000097631","pdf_url":null,"source":{"id":"https://openalex.org/S4306401992","display_name":"Repository KITopen (Karlsruhe Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I102335020","host_organization_name":"Karlsruhe Institute of Technology","host_organization_lineage":["https://openalex.org/I102335020"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"doc-type:conferenceObject"},{"id":"doi:10.5445/ir/1000097631","is_oa":true,"landing_page_url":"https://doi.org/10.5445/ir/1000097631","pdf_url":null,"source":{"id":"https://openalex.org/S7407052948","display_name":"KITopen","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.5445/ir/1000097631","is_oa":true,"landing_page_url":"https://doi.org/10.5445/ir/1000097631","pdf_url":null,"source":{"id":"https://openalex.org/S7407052948","display_name":"KITopen","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W138284838","https://openalex.org/W1425731158","https://openalex.org/W1573714593","https://openalex.org/W1726445723","https://openalex.org/W1976682045","https://openalex.org/W1993399552","https://openalex.org/W2030243808","https://openalex.org/W2032574053","https://openalex.org/W2067974452","https://openalex.org/W2077333702","https://openalex.org/W2097199722","https://openalex.org/W2111217142","https://openalex.org/W2130564474","https://openalex.org/W2139749722","https://openalex.org/W2150749717","https://openalex.org/W2158874082","https://openalex.org/W2159707167","https://openalex.org/W2295072202","https://openalex.org/W2400487982","https://openalex.org/W2508838500","https://openalex.org/W2519490418","https://openalex.org/W2570663628","https://openalex.org/W2604604482","https://openalex.org/W2886405845","https://openalex.org/W4234803039","https://openalex.org/W6628445530","https://openalex.org/W6674629224","https://openalex.org/W6697410683","https://openalex.org/W6725708617","https://openalex.org/W6753874770"],"related_works":["https://openalex.org/W2003608043","https://openalex.org/W153119118","https://openalex.org/W2583658747","https://openalex.org/W2405892617","https://openalex.org/W2158874082","https://openalex.org/W4293529780","https://openalex.org/W1575722140","https://openalex.org/W2073921136","https://openalex.org/W2055795184","https://openalex.org/W2123393931"],"abstract_inverted_index":{"The":[0],"suffix":[1,33,58],"array":[2,34,59],"is":[3],"the":[4,27,63],"key":[5],"to":[6,38,41],"efficient":[7],"solutions":[8],"for":[9],"myriads":[10],"of":[11,30,76,84],"string":[12],"processing":[13,69,75],"problems":[14],"in":[15,82],"different":[16],"application":[17],"domains,":[18],"like":[19],"data":[20,22,67],"compression,":[21],"mining,":[23],"or":[24],"bioinformatics.":[25],"With":[26],"rapid":[28],"growth":[29],"available":[31],"data,":[32],"construction":[35,60],"algorithms":[36,61],"have":[37,87],"be":[39],"adapted":[40],"advanced":[42],"computational":[43],"models":[44],"such":[45],"as":[46],"external":[47],"memory":[48],"and":[49],"distributed":[50,80],"computing.":[51],"In":[52],"this":[53],"article,":[54],"we":[55],"present":[56],"five":[57],"utilizing":[62],"new":[64],"algorithmic":[65],"big":[66],"batch":[68],"framework":[70],"Thrill,":[71],"which":[72],"allows":[73],"scalable":[74],"input":[77],"sizes":[78],"on":[79],"systems":[81],"orders":[83],"magnitude":[85],"that":[86],"not":[88],"been":[89],"considered":[90],"before.":[91]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
