{"id":"https://openalex.org/W1982930205","doi":"https://doi.org/10.1145/2389686.2389688","title":"When big data leads to lost data","display_name":"When big data leads to lost data","publication_year":2012,"publication_date":"2012-11-02","ids":{"openalex":"https://openalex.org/W1982930205","doi":"https://doi.org/10.1145/2389686.2389688","mag":"1982930205"},"language":"en","primary_location":{"id":"doi:10.1145/2389686.2389688","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2389686.2389688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Ph.D. workshop on Information and knowledge","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083399212","display_name":"V. M. Megler","orcid":null},"institutions":[{"id":"https://openalex.org/I126345244","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02","country_code":"US","type":"education","lineage":["https://openalex.org/I126345244"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"V. M. Megler","raw_affiliation_strings":["Portland State University, Portland, OR, USA","(Portland State University, Portland, OR, USA"],"affiliations":[{"raw_affiliation_string":"Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]},{"raw_affiliation_string":"(Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014301644","display_name":"David Maier","orcid":"https://orcid.org/0000-0003-4790-5619"},"institutions":[{"id":"https://openalex.org/I126345244","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02","country_code":"US","type":"education","lineage":["https://openalex.org/I126345244"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Maier","raw_affiliation_strings":["Portland State University, Portland, OR, USA","(Portland State University, Portland, OR, USA"],"affiliations":[{"raw_affiliation_string":"Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]},{"raw_affiliation_string":"(Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5083399212"],"corresponding_institution_ids":["https://openalex.org/I126345244"],"apc_list":null,"apc_paid":null,"fwci":1.9588,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88666752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6171917915344238},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5692426562309265},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3214256763458252},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.1978960931301117}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6171917915344238},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5692426562309265},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3214256763458252},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.1978960931301117}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2389686.2389688","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2389686.2389688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th Ph.D. workshop on Information and knowledge","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Life below water","id":"https://metadata.un.org/sdg/14","score":0.8600000143051147}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W119481552","https://openalex.org/W174803244","https://openalex.org/W1489817976","https://openalex.org/W1489823821","https://openalex.org/W1532325895","https://openalex.org/W1573605934","https://openalex.org/W1592033656","https://openalex.org/W1602022764","https://openalex.org/W1602667807","https://openalex.org/W1606412149","https://openalex.org/W1633682760","https://openalex.org/W1659561774","https://openalex.org/W1683392482","https://openalex.org/W1687157824","https://openalex.org/W2008251177","https://openalex.org/W2009688537","https://openalex.org/W2020572986","https://openalex.org/W2027382829","https://openalex.org/W2052825015","https://openalex.org/W2084863702","https://openalex.org/W2089192828","https://openalex.org/W2092364718","https://openalex.org/W2095849344","https://openalex.org/W2116504754","https://openalex.org/W2118516929","https://openalex.org/W2124318441","https://openalex.org/W2136009376","https://openalex.org/W2144790020","https://openalex.org/W2154724067","https://openalex.org/W2155822833","https://openalex.org/W2165723447","https://openalex.org/W2482128004","https://openalex.org/W2912067980","https://openalex.org/W2989499211","https://openalex.org/W3206971523","https://openalex.org/W4213009331"],"related_works":["https://openalex.org/W4322629366","https://openalex.org/W2808989540","https://openalex.org/W2617449561","https://openalex.org/W2397053934","https://openalex.org/W1039292361","https://openalex.org/W2767632110","https://openalex.org/W2551093110","https://openalex.org/W2148016376","https://openalex.org/W3184179822","https://openalex.org/W1996408511"],"abstract_inverted_index":{"For":[0],"decades,":[1],"scientists":[2,29,39,61],"bemoaned":[3],"the":[4,31,63,75,124,158,167,182,206],"scarcity":[5],"of":[6,49,56,126,139,150,201],"observational":[7],"data":[8,21,50,87],"to":[9,14,33,67,90,144,166,180,191],"analyze":[10],"and":[11,141,188],"against":[12],"which":[13],"test":[15],"their":[16,34,68],"models.":[17],"Exponential":[18],"growth":[19,196],"in":[20,54,109],"volumes":[22],"from":[23,147],"ever-cheaper":[24],"environmental":[25],"sensors":[26],"has":[27],"provided":[28],"with":[30,44],"answer":[32],"prayers:":[35],"\"big":[36],"data\".":[37],"Now,":[38],"face":[40],"a":[41,110,137],"new":[42,111],"challenge:":[43],"terabytes,":[45],"petabytes":[46],"or":[47],"exabytes":[48],"at":[51,176],"hand,":[52],"stored":[53],"thousands":[55],"heterogeneous":[57],"datasets,":[58],"how":[59,193],"can":[60],"find":[62,74],"datasets":[64],"most":[65],"relevant":[66],"research":[69,93,190],"interests?":[70],"If":[71],"they":[72,78],"cannot":[73],"data,":[76],"then":[77,154],"may":[79],"as":[80,102],"well":[81],"never":[82],"have":[83],"collected":[84],"it;":[85],"that":[86,135],"is":[88],"lost":[89],"them.":[91],"Our":[92],"addresses":[94],"this":[95,107],"challenge,":[96],"using":[97],"an":[98,133,173,177],"existing":[99],"scientific":[100,129,151],"archive":[101,195],"our":[103,199],"test-bed.":[104],"We":[105,131,153,170,185],"approach":[106,134],"problem":[108],"way:":[112],"by":[113,164],"adapting":[114],"Information":[115],"Retrieval":[116],"techniques,":[117],"developed":[118],"for":[119],"searching":[120],"text":[121],"documents,":[122],"into":[123],"world":[125],"(primarily":[127],"numeric)":[128],"data.":[130,152],"propose":[132,186],"uses":[136],"blend":[138],"automated":[140],"\"semi-curated\"":[142],"methods":[143],"extract":[145],"metadata":[146],"large":[148],"archives":[149],"perform":[155],"searches":[156],"over":[157],"extracted":[159],"metadata,":[160],"returning":[161],"results":[162],"ranked":[163],"similarity":[165],"query":[168],"terms.":[169],"briefly":[171],"describe":[172],"implementation":[174],"performed":[175],"ocean":[178],"observatory":[179],"validate":[181],"proposed":[183],"approach.":[184],"performance":[187],"scalability":[189],"explore":[192],"continued":[194],"will":[197],"affect":[198],"goal":[200],"interactive":[202],"response,":[203],"no":[204],"matter":[205],"scale.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
