{"id":"https://openalex.org/W2995435218","doi":"https://doi.org/10.1109/bigdata.2018.8622360","title":"A Scalable and Robust Framework for Data Stream Ingestion","display_name":"A Scalable and Robust Framework for Data Stream Ingestion","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2995435218","doi":"https://doi.org/10.1109/bigdata.2018.8622360","mag":"2995435218"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622360","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622360","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1812.04197","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041098845","display_name":"Haruna Isah","orcid":"https://orcid.org/0000-0002-3629-152X"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Haruna Isah","raw_affiliation_strings":["School of Computing, Queen\u2019s University, Kingston, Canada","School of Computing, Queen's University, Kingston, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Queen\u2019s University, Kingston, Canada","institution_ids":["https://openalex.org/I204722609"]},{"raw_affiliation_string":"School of Computing, Queen's University, Kingston, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063480277","display_name":"Farhana Zulkernine","orcid":"https://orcid.org/0000-0002-3326-0875"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Farhana Zulkernine","raw_affiliation_strings":["School of Computing, Queen\u2019s University, Kingston, Canada","School of Computing, Queen's University, Kingston, Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing, Queen\u2019s University, Kingston, Canada","institution_ids":["https://openalex.org/I204722609"]},{"raw_affiliation_string":"School of Computing, Queen's University, Kingston, Canada","institution_ids":["https://openalex.org/I204722609"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.3928,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.95583872,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2900","last_page":"2905"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7983541488647461},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7578266859054565},{"id":"https://openalex.org/keywords/stream-processing","display_name":"Stream processing","score":0.6422861218452454},{"id":"https://openalex.org/keywords/data-stream-mining","display_name":"Data stream mining","score":0.614175021648407},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.49271586537361145},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.4778251647949219},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.44196465611457825},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.42749032378196716},{"id":"https://openalex.org/keywords/data-stream","display_name":"Data stream","score":0.41828081011772156},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.41295087337493896},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.37658485770225525},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.35326695442199707},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3198981285095215}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7983541488647461},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7578266859054565},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.6422861218452454},{"id":"https://openalex.org/C89198739","wikidata":"https://www.wikidata.org/wiki/Q3079880","display_name":"Data stream mining","level":2,"score":0.614175021648407},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.49271586537361145},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.4778251647949219},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.44196465611457825},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.42749032378196716},{"id":"https://openalex.org/C2778484313","wikidata":"https://www.wikidata.org/wiki/Q1172540","display_name":"Data stream","level":2,"score":0.41828081011772156},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.41295087337493896},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.37658485770225525},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.35326695442199707},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3198981285095215},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2018.8622360","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622360","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1812.04197","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1812.04197","pdf_url":"https://arxiv.org/pdf/1812.04197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1812.04197","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1812.04197","pdf_url":"https://arxiv.org/pdf/1812.04197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6399999856948853}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2272236827","https://openalex.org/W2294361615","https://openalex.org/W2295727839","https://openalex.org/W2437053200","https://openalex.org/W2515249113","https://openalex.org/W2583699183","https://openalex.org/W2766914983","https://openalex.org/W2769465852","https://openalex.org/W2772043392","https://openalex.org/W2800244123","https://openalex.org/W2804461409","https://openalex.org/W3025270055","https://openalex.org/W6697187514","https://openalex.org/W6732956039"],"related_works":["https://openalex.org/W4389449520","https://openalex.org/W2908411463","https://openalex.org/W127192698","https://openalex.org/W2570600173","https://openalex.org/W2893008024","https://openalex.org/W2743735673","https://openalex.org/W2886490431","https://openalex.org/W4361801939","https://openalex.org/W2360131081","https://openalex.org/W180351855"],"abstract_inverted_index":{"An":[0],"essential":[1],"part":[2],"of":[3,17,26,39,84,132,148,151,178,192],"building":[4],"a":[5,36,44,158,172,185,196],"data-driven":[6],"organization":[7],"is":[8,101],"the":[9,30,129,142,146,149,190,193,217],"ability":[10],"to":[11,19,35,90,103],"handle":[12],"and":[13,29,80,106,113,124,134,145,160,165,180,188,207,225],"process":[14],"continuous":[15,45],"streams":[16,109],"data":[18,40,48,63,85,91,108,153,162,183,198,233],"discover":[20],"actionable":[21],"insights.":[22],"The":[23,77,219],"explosive":[24],"growth":[25],"interconnected":[27],"devices":[28],"social":[31],"Web":[32],"has":[33],"led":[34],"large":[37],"volume":[38,79],"being":[41],"generated":[42],"on":[43],"basis.":[46],"Streaming":[47],"sources":[49,112],"such":[50,96],"as":[51,171],"stock":[52],"quotes,":[53],"credit":[54],"card":[55],"transactions,":[56],"trending":[57],"news,":[58],"traffic":[59],"conditions,":[60],"time-sensitive":[61],"patient's":[62],"are":[64],"not":[65,74],"only":[66],"very":[67],"common":[68],"but":[69,98],"can":[70,127,169],"rapidly":[71],"depreciate":[72],"if":[73],"processed":[75],"quickly.":[76],"ever-increasing":[78],"highly":[81],"irregular":[82],"nature":[83],"rates":[86],"pose":[87],"new":[88,122],"challenges":[89,120],"stream":[92,154,163,199,234],"processing":[93,200,210,235],"systems.":[94],"One":[95],"challenging":[97],"important":[99],"task":[100],"how":[102],"accurately":[104],"ingest":[105],"integrate":[107],"from":[110,215],"various":[111],"locations":[114],"into":[115],"an":[116],"analytics":[117],"platform.":[118],"These":[119],"demand":[121],"strategies":[123],"systems":[125],"that":[126,168,203],"offer":[128],"desired":[130],"degree":[131],"scalability":[133],"robustness":[135],"in":[136,184,195,230],"handling":[137],"failures.":[138],"This":[139],"paper":[140],"investigates":[141],"fundamental":[143],"requirements":[144],"state":[147],"art":[150],"existing":[152],"ingestion":[155,164],"systems,":[156],"propose":[157],"scalable":[159],"fault-tolerant":[161],"integration":[166],"framework":[167,194],"serve":[170],"reusable":[173],"component":[174],"across":[175,216],"many":[176],"feeds":[177],"structured":[179],"unstructured":[181],"input":[182],"given":[186],"platform,":[187],"demonstrate":[189],"utility":[191],"real-world":[197],"case":[201],"study":[202,220],"integrates":[204],"Apache":[205],"NiFi":[206],"Kafka":[208],"for":[209,227],"high":[211],"velocity":[212],"news":[213],"articles":[214],"globe.":[218],"also":[221],"identifies":[222],"best":[223],"practices":[224],"gaps":[226],"future":[228],"research":[229],"developing":[231],"large-scale":[232],"infrastructure.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
