{"id":"https://openalex.org/W2214582355","doi":"https://doi.org/10.1109/bigdata.2015.7363879","title":"How valuable is your data? A quantitative approach using data mining","display_name":"How valuable is your data? A quantitative approach using data mining","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2214582355","doi":"https://doi.org/10.1109/bigdata.2015.7363879","mag":"2214582355"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7363879","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077747661","display_name":"Vinay Deolalikar","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Vinay Deolalikar","raw_affiliation_strings":["Groupon, Palo Alto, CA"],"affiliations":[{"raw_affiliation_string":"Groupon, Palo Alto, CA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5077747661"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.13561471,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"52","issue":null,"first_page":"1248","last_page":"1253"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7288938164710999},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.6817877888679504},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6073058843612671},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.5751227140426636},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5605127215385437},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5239229798316956},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.49551165103912354},{"id":"https://openalex.org/keywords/master-data","display_name":"Master data","score":0.48730552196502686},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.42431578040122986},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.41560399532318115},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.22834429144859314},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12912359833717346}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7288938164710999},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.6817877888679504},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6073058843612671},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.5751227140426636},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5605127215385437},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5239229798316956},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49551165103912354},{"id":"https://openalex.org/C61871575","wikidata":"https://www.wikidata.org/wiki/Q384093","display_name":"Master data","level":2,"score":0.48730552196502686},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42431578040122986},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.41560399532318115},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.22834429144859314},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12912359833717346},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2015.7363879","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7363879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5299999713897705,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1532325895","https://openalex.org/W1546703457","https://openalex.org/W1651093245","https://openalex.org/W1968613377","https://openalex.org/W2154353617","https://openalex.org/W2166559705","https://openalex.org/W2998574808","https://openalex.org/W4213009331","https://openalex.org/W6632910029","https://openalex.org/W6637231022"],"related_works":["https://openalex.org/W2029182001","https://openalex.org/W1560447936","https://openalex.org/W4388429292","https://openalex.org/W4248418894","https://openalex.org/W2594011304","https://openalex.org/W2888855080","https://openalex.org/W3201731148","https://openalex.org/W2748020237","https://openalex.org/W2040005010","https://openalex.org/W2152261054"],"abstract_inverted_index":{"Unstructured":[0],"textual":[1],"data":[2,31,40,58,71,111,138,159],"has":[3],"grown":[4],"rapidly":[5],"in":[6,11,92,105,125,146],"the":[7,48,61,75,107,115,127,142,168,195,217],"past":[8],"two":[9,166],"decades":[10],"various":[12],"domains":[13],"like":[14],"enterprises,":[15],"web,":[16],"scientific,":[17],"etc.":[18],"A":[19],"question":[20,52],"that":[21,73,87,101,140,183],"arises":[22],"naturally":[23],"when":[24],"there":[25],"is":[26,35,59,88,112,182],"such":[27,123],"a":[28,36,68,82,135,184],"surfeit":[29],"of":[30,39,77,85,109,144,167,186,197,206],"is:":[32],"how":[33,55],"valuable":[34,56,129,193],"certain":[37],"piece":[38],"as":[41],"compared":[42],"to":[43,50,60],"another?":[44],"In":[45,63],"an":[46],"enterprise,":[47],"answer":[49],"this":[51,64],"would":[53],"determine":[54],"said":[57],"enterprise.":[62],"paper,":[65],"we":[66,99,133],"build":[67,134],"framework":[69,136,149],"using":[70,137],"mining":[72,139],"quantifies":[74],"value":[76,108],"data.":[78,147],"We":[79,161],"first":[80],"identify":[81],"specific":[83],"notion":[84],"\"value\"":[86],"motivated":[89],"by":[90,114,210],"applications":[91,104],"Enterprise":[93],"unstructured":[94,110],"Information":[95],"Management":[96],"(EIM).":[97],"Namely,":[98],"posit":[100],"for":[102,158],"several":[103],"EIM,":[106],"determined":[113],"associations":[116,124,145,200],"it":[117,130],"captures":[118],"between":[119,201],"concepts.":[120],"The":[121],"more":[122,128,192,199],"data,":[126],"is.":[131],"Next,":[132],"\"counts\"":[141],"number":[143],"Our":[148,179,212],"uses":[150],"clustering":[151],"and":[152,176],"frequent":[153],"itemsets.":[154],"It":[155],"also":[156],"normalizes":[157],"size.":[160],"demonstrate":[162],"our":[163],"approach":[164,214],"on":[165],"most":[169],"widely":[170],"used":[171],"text":[172],"benchmark":[173],"datasets:":[174],"Reuters":[175],"20":[177],"Newsgroups.":[178],"general":[180],"intuition":[181],"corpus":[185],"professionally":[187],"written":[188,209],"news":[189],"articles":[190],"are":[191],"(in":[194],"sense":[196],"capturing":[198],"concepts)":[202],"than":[203],"newsgroup":[204],"postings":[205],"variable":[207],"quality":[208],"non-experts.":[211],"quantitative":[213],"indeed":[215],"reaches":[216],"same":[218],"inference.":[219]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
