{"id":"https://openalex.org/W4206528075","doi":"https://doi.org/10.1186/s40537-021-00554-3","title":"Addressing big data variety using an automated approach for data characterization","display_name":"Addressing big data variety using an automated approach for data characterization","publication_year":2022,"publication_date":"2022-01-10","ids":{"openalex":"https://openalex.org/W4206528075","doi":"https://doi.org/10.1186/s40537-021-00554-3"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-021-00554-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-021-00554-3","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-021-00554-3","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-021-00554-3","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037801842","display_name":"Georgios Vranopoulos","orcid":"https://orcid.org/0000-0002-2874-6459"},"institutions":[{"id":"https://openalex.org/I897542642","display_name":"University of Plymouth","ror":"https://ror.org/008n7pv89","country_code":"GB","type":"education","lineage":["https://openalex.org/I897542642"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Georgios Vranopoulos","raw_affiliation_strings":["School of Engineering, Computing & Mathematics, University of Plymouth, Plymouth, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Computing & Mathematics, University of Plymouth, Plymouth, UK","institution_ids":["https://openalex.org/I897542642"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029670131","display_name":"Nathan Clarke","orcid":"https://orcid.org/0000-0002-3595-3800"},"institutions":[{"id":"https://openalex.org/I897542642","display_name":"University of Plymouth","ror":"https://ror.org/008n7pv89","country_code":"GB","type":"education","lineage":["https://openalex.org/I897542642"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nathan Clarke","raw_affiliation_strings":["School of Engineering, Computing & Mathematics, University of Plymouth, Plymouth, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Computing & Mathematics, University of Plymouth, Plymouth, UK","institution_ids":["https://openalex.org/I897542642"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017847728","display_name":"Shirley Atkinson","orcid":"https://orcid.org/0000-0002-7740-4084"},"institutions":[{"id":"https://openalex.org/I897542642","display_name":"University of Plymouth","ror":"https://ror.org/008n7pv89","country_code":"GB","type":"education","lineage":["https://openalex.org/I897542642"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shirley Atkinson","raw_affiliation_strings":["School of Engineering, Computing & Mathematics, University of Plymouth, Plymouth, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering, Computing & Mathematics, University of Plymouth, Plymouth, UK","institution_ids":["https://openalex.org/I897542642"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5037801842"],"corresponding_institution_ids":["https://openalex.org/I897542642"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":1.8536,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.86608735,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"9","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13398","display_name":"Data Analysis with R","score":0.944100022315979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8517205119132996},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7067676782608032},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7067520618438721},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.6615866422653198},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6372748613357544},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6175462007522583},{"id":"https://openalex.org/keywords/usable","display_name":"USable","score":0.47470182180404663},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.45989492535591125},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.44561177492141724},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39300957322120667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25981009006500244},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.20446613430976868}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8517205119132996},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7067676782608032},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7067520618438721},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.6615866422653198},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6372748613357544},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6175462007522583},{"id":"https://openalex.org/C2780615836","wikidata":"https://www.wikidata.org/wiki/Q2471869","display_name":"USable","level":2,"score":0.47470182180404663},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45989492535591125},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.44561177492141724},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39300957322120667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25981009006500244},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.20446613430976868},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1186/s40537-021-00554-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-021-00554-3","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-021-00554-3","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:cd0c2c140fa3418ea99ea9f439856ff5","is_oa":true,"landing_page_url":"https://doaj.org/article/cd0c2c140fa3418ea99ea9f439856ff5","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 9, Iss 1, Pp 1-28 (2022)","raw_type":"article"},{"id":"pmh:oai:pearl.plymouth.ac.uk:10026.1/18537","is_oa":false,"landing_page_url":"http://hdl.handle.net/10026.1/18537","pdf_url":null,"source":{"id":"https://openalex.org/S4306402507","display_name":"PEARL (University of Plymouth)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I897542642","host_organization_name":"University of Plymouth","host_organization_lineage":["https://openalex.org/I897542642"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1186/s40537-021-00554-3","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-021-00554-3","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-021-00554-3","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4206528075.pdf","grobid_xml":"https://content.openalex.org/works/W4206528075.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W1971330976","https://openalex.org/W1994410669","https://openalex.org/W2013627627","https://openalex.org/W2019880039","https://openalex.org/W2023134523","https://openalex.org/W2030255344","https://openalex.org/W2041588414","https://openalex.org/W2090794497","https://openalex.org/W2159588611","https://openalex.org/W2165093166","https://openalex.org/W2172250756","https://openalex.org/W2385619501","https://openalex.org/W2782986791","https://openalex.org/W2991722393","https://openalex.org/W3197562226","https://openalex.org/W4236737715"],"related_works":["https://openalex.org/W2982321410","https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W95465806","https://openalex.org/W2392004567","https://openalex.org/W2095118173","https://openalex.org/W2382021449","https://openalex.org/W2046296964","https://openalex.org/W2104269053","https://openalex.org/W2183692821"],"abstract_inverted_index":{"Abstract":[0],"The":[1,85,135,175,201],"creation":[2],"of":[3,14,18,23,44,91,99,108,132,137,154,171,177,194,198,209,223,231,234,251,260,264,276],"new":[4],"knowledge":[5,11],"from":[6],"manipulating":[7],"and":[8,35,50,64,71,79,111,123,161,169,183,185,196,242],"analysing":[9],"existing":[10],"is":[12,47,59,245],"one":[13],"the":[15,24,66,89,106,130,133,138,152,167,172,178,192,199,207,214,221,229,249,258,262,277],"primary":[16],"objectives":[17],"any":[19],"cognitive":[20],"system.":[21],"Most":[22],"effort":[25],"on":[26,88,158,273],"Big":[27,45,83],"Data":[28,156],"research":[29],"has":[30],"been":[31],"focussed":[32],"upon":[33],"Volume":[34],"Velocity":[36],",":[37,40],"while":[38],"Variety":[39,58],"\u201cthe":[41],"ugly":[42],"duckling\u201d":[43],"Data,":[46],"often":[48],"neglected":[49],"difficult":[51],"to":[52,62,141,191,213],"solve.":[53],"A":[54],"principal":[55],"challenge":[56],"with":[57,105,239],"being":[60],"able":[61],"understand":[63],"comprehend":[65],"data.":[67,279],"This":[68],"paper":[69,86],"proposes":[70],"evaluates":[72],"an":[73],"automated":[74],"approach":[75],"for":[76],"metadata":[77,114],"identification":[78,125,160,222],"enrichment":[80],"in":[81,128,189,217,247],"describing":[82],"Data.":[84],"focuses":[87],"use":[90,208,230],"self-learning":[92],"systems":[93],"that":[94,143,228],"will":[95],"enable":[96],"automatic":[97],"compliance":[98],"data":[100,116,121,124,159,173,236,265],"against":[101],"regulatory":[102],"requirements":[103],"along":[104,238],"capability":[107],"generating":[109],"valuable":[110],"readily":[112],"usable":[113],"towards":[115,120,166],"classification.":[117],"Two":[118],"experiments":[119,139],"confidentiality":[122],"were":[126,181],"conducted":[127],"evaluating":[129],"feasibility":[131],"approach.":[134],"focus":[136,153,165,272],"was":[140],"confirm":[142],"repetitive":[144],"manual":[145],"tasks":[146],"can":[147,266],"be":[148,267],"automated,":[149],"thus":[150],"reducing":[151],"a":[155,232,235,270],"Scientist":[157],"thereby":[162],"providing":[163],"more":[164],"extraction":[168],"analysis":[170,241],"itself.":[174],"origin":[176],"datasets":[179],"used":[180],"Private/Business":[182],"Public/Governmental":[184],"exhibited":[186],"diverse":[187],"characteristics":[188],"relation":[190],"number":[193],"files":[195],"size":[197],"files.":[200],"experimental":[202],"work":[203],"confirmed":[204],"that:":[205],"(a)":[206],"algorithmic":[210],"techniques":[211],"attributed":[212],"substantial":[215],"decrease":[216],"false":[218],"positives":[219],"regarding":[220],"confidential":[224],"information;":[225],"(b)":[226],"evidence":[227],"fraction":[233],"set":[237],"statistical":[240],"supervised":[243],"learning":[244],"sufficient":[246],"identifying":[248],"structure":[250],"information":[252],"within":[253],"it.":[254],"With":[255],"this":[256],"approach,":[257],"issues":[259],"understanding":[261],"nature":[263],"mitigated,":[268],"enabling":[269],"greater":[271],"meaningful":[274],"interpretation":[275],"heterogeneous":[278]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
