{"id":"https://openalex.org/W2211954543","doi":"https://doi.org/10.1109/bigdata.2015.7363872","title":"Record-aware compression for big textual data analysis acceleration","display_name":"Record-aware compression for big textual data analysis acceleration","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2211954543","doi":"https://doi.org/10.1109/bigdata.2015.7363872","mag":"2211954543"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7363872","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054788422","display_name":"Dapeng Dong","orcid":"https://orcid.org/0000-0001-8545-8931"},"institutions":[{"id":"https://openalex.org/I27577105","display_name":"University College Cork","ror":"https://ror.org/03265fv13","country_code":"IE","type":"education","lineage":["https://openalex.org/I27577105"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Dapeng Dong","raw_affiliation_strings":["Mobile and Internet Systems Laboratory, University College Cork, Ireland"],"affiliations":[{"raw_affiliation_string":"Mobile and Internet Systems Laboratory, University College Cork, Ireland","institution_ids":["https://openalex.org/I27577105"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112150566","display_name":"John J. Herbert","orcid":null},"institutions":[{"id":"https://openalex.org/I27577105","display_name":"University College Cork","ror":"https://ror.org/03265fv13","country_code":"IE","type":"education","lineage":["https://openalex.org/I27577105"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"John Herbert","raw_affiliation_strings":["Mobile and Internet Systems Laboratory, University College Cork, Ireland"],"affiliations":[{"raw_affiliation_string":"Mobile and Internet Systems Laboratory, University College Cork, Ireland","institution_ids":["https://openalex.org/I27577105"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5054788422"],"corresponding_institution_ids":["https://openalex.org/I27577105"],"apc_list":null,"apc_paid":null,"fwci":2.5886,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.91648534,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1183","last_page":"1190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8568186163902283},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.8029930591583252},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7093045115470886},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5756213068962097},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5278696417808533},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.5091981291770935},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4897620677947998},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.47717776894569397},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4572874903678894},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42796093225479126},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.41068035364151},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.41026678681373596},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.38671553134918213},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3191717565059662},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.24759653210639954},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10323584079742432}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8568186163902283},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.8029930591583252},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7093045115470886},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5756213068962097},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5278696417808533},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.5091981291770935},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4897620677947998},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.47717776894569397},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4572874903678894},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42796093225479126},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.41068035364151},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.41026678681373596},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.38671553134918213},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3191717565059662},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.24759653210639954},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10323584079742432},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2015.7363872","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.6299999952316284}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320838","display_name":"Higher Education Authority","ror":"https://ror.org/0471xye93"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W38739846","https://openalex.org/W1875619160","https://openalex.org/W1972106352","https://openalex.org/W1990653637","https://openalex.org/W2041404167","https://openalex.org/W2057420573","https://openalex.org/W2060108852","https://openalex.org/W2061873838","https://openalex.org/W2095977162","https://openalex.org/W2107745473","https://openalex.org/W2122962290","https://openalex.org/W2127492100","https://openalex.org/W2155729921","https://openalex.org/W2160484851","https://openalex.org/W2161488606","https://openalex.org/W2165564574","https://openalex.org/W2277359368","https://openalex.org/W2993383518","https://openalex.org/W6601528862","https://openalex.org/W6648982606","https://openalex.org/W6694713699"],"related_works":["https://openalex.org/W3024364549","https://openalex.org/W4206019083","https://openalex.org/W2048865712","https://openalex.org/W1556451512","https://openalex.org/W1976265003","https://openalex.org/W2370378377","https://openalex.org/W1555349535","https://openalex.org/W4237510188","https://openalex.org/W2130160813","https://openalex.org/W2054476758"],"abstract_inverted_index":{"Big":[0],"data":[1,13,17,40,45,54,65,79,87,117,163],"analysis":[2,157],"technologies":[3],"are":[4],"becoming":[5],"more":[6],"widely":[7],"used":[8],"in":[9],"industry.":[10],"The":[11,148],"ever-increasing":[12],"volume,":[14],"however,":[15],"puts":[16],"analytic":[18],"platforms":[19],"such":[20,142],"as":[21,58,120,122,143],"Hadoop":[22,35,51,109],"under":[23],"constant":[24],"pressure.":[25],"Several":[26],"compression":[27],"methods":[28],"have":[29],"been":[30],"made":[31],"available":[32],"on":[33,156],"the":[34,50,68,74,86,95,102,114],"platform":[36],"to":[37,85,161],"effectively":[38],"reduce":[39],"size":[41,164],"and":[42,112,116,146,159],"efficiently":[43],"deliver":[44],"between":[46],"cluster":[47],"nodes.":[48],"In":[49,72],"context,":[52],"compressed":[53,103],"can":[55],"be":[56],"categorized":[57],"splittable":[59,78],"or":[60],"non-splittable.":[61],"Working":[62],"with":[63,67,134],"non-splittable":[64],"conflicts":[66],"goal":[69],"of":[70,77,130,137],"parallelism.":[71],"addition,":[73],"current":[75],"realization":[76],"by":[80],"indexing":[81],"is":[82],"potentially":[83],"harmful":[84],"locality":[88,118],"property.":[89],"To":[90],"this":[91],"end,":[92],"we":[93],"introduce":[94],"Record-aware":[96],"Compression":[97],"(RaC)":[98],"scheme":[99],"that":[100],"makes":[101],"contents":[104],"splittable,":[105],"uses":[106],"a":[107,128,135],"lightweight":[108],"Record":[110],"Reader,":[111],"preserves":[113],"parallelism":[115],"properties":[119],"much":[121],"possible.":[123],"We":[124],"evaluate":[125],"RaC":[126],"using":[127],"set":[129],"classical":[131],"MapReduce":[132],"jobs":[133],"collection":[136],"well-known":[138],"datasets":[139],"from":[140],"companies":[141],"Google,":[144],"Yahoo!,":[145],"Amazon.":[147],"experimental":[149],"results":[150],"show":[151],"an":[152],"average":[153],"24%":[154],"improvement":[155],"performance":[158],"up":[160],"75%":[162],"reduction.":[165]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
