{"id":"https://openalex.org/W2147025414","doi":"https://doi.org/10.1109/icdew.2008.4498331","title":"Quarrying dataspaces: Schemaless profiling of unfamiliar information sources","display_name":"Quarrying dataspaces: Schemaless profiling of unfamiliar information sources","publication_year":2008,"publication_date":"2008-04-01","ids":{"openalex":"https://openalex.org/W2147025414","doi":"https://doi.org/10.1109/icdew.2008.4498331","mag":"2147025414"},"language":"en","primary_location":{"id":"doi:10.1109/icdew.2008.4498331","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdew.2008.4498331","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE 24th International Conference on Data Engineering Workshop","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007124763","display_name":"Bill Howe","orcid":"https://orcid.org/0000-0001-8588-8472"},"institutions":[{"id":"https://openalex.org/I126345244","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02","country_code":"US","type":"education","lineage":["https://openalex.org/I126345244"]},{"id":"https://openalex.org/I165690674","display_name":"Oregon Health & Science University","ror":"https://ror.org/009avj582","country_code":"US","type":"education","lineage":["https://openalex.org/I165690674"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bill Howe","raw_affiliation_strings":["Department of Computer Science, Portland State University, Portland, OR, USA","Center for Coastal Margin Obs. & Prediction, Oregon Health & Sci. Univ., Beaverton, OR"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]},{"raw_affiliation_string":"Center for Coastal Margin Obs. & Prediction, Oregon Health & Sci. Univ., Beaverton, OR","institution_ids":["https://openalex.org/I165690674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014301644","display_name":"David Maier","orcid":"https://orcid.org/0000-0003-4790-5619"},"institutions":[{"id":"https://openalex.org/I126345244","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02","country_code":"US","type":"education","lineage":["https://openalex.org/I126345244"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Maier","raw_affiliation_strings":["Department of Computer Science, Portland State University, Portland, OR, USA","Portland State University, Department of Computer Science, 1900 SW 4th Avenue, Oregon, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]},{"raw_affiliation_string":"Portland State University, Department of Computer Science, 1900 SW 4th Avenue, Oregon, USA#TAB#","institution_ids":["https://openalex.org/I126345244"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108402351","display_name":"Nicolas Rayner","orcid":null},"institutions":[{"id":"https://openalex.org/I126345244","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02","country_code":"US","type":"education","lineage":["https://openalex.org/I126345244"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicolas Rayner","raw_affiliation_strings":["Department of Computer Science, Portland State University, Portland, OR, USA","Portland State University, Department of Computer Science, 1900 SW 4th Avenue, Oregon, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Portland State University, Portland, OR, USA","institution_ids":["https://openalex.org/I126345244"]},{"raw_affiliation_string":"Portland State University, Department of Computer Science, 1900 SW 4th Avenue, Oregon, USA#TAB#","institution_ids":["https://openalex.org/I126345244"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042444567","display_name":"James Rucker","orcid":"https://orcid.org/0000-0003-4647-8088"},"institutions":[{"id":"https://openalex.org/I126345244","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02","country_code":"US","type":"education","lineage":["https://openalex.org/I126345244"]},{"id":"https://openalex.org/I165690674","display_name":"Oregon Health & Science University","ror":"https://ror.org/009avj582","country_code":"US","type":"education","lineage":["https://openalex.org/I165690674"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Rucker","raw_affiliation_strings":["Center for Coastal Margin Observation and Prediction, Oregon Health and Sciences University, Beaverton, OR, USA","Portland State University, Department of Computer Science, 1900 SW 4th Avenue, Oregon, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Center for Coastal Margin Observation and Prediction, Oregon Health and Sciences University, Beaverton, OR, USA","institution_ids":["https://openalex.org/I165690674"]},{"raw_affiliation_string":"Portland State University, Department of Computer Science, 1900 SW 4th Avenue, Oregon, USA#TAB#","institution_ids":["https://openalex.org/I126345244"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007124763"],"corresponding_institution_ids":["https://openalex.org/I126345244","https://openalex.org/I165690674"],"apc_list":null,"apc_paid":null,"fwci":2.9687,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.91248513,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"3226","issue":null,"first_page":"270","last_page":"277"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7510421872138977},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.7476378679275513},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6647877097129822},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.579391360282898},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5249559879302979},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40440431237220764},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2720324993133545},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1466892659664154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7510421872138977},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.7476378679275513},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6647877097129822},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.579391360282898},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5249559879302979},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40440431237220764},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2720324993133545},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1466892659664154}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdew.2008.4498331","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdew.2008.4498331","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE 24th International Conference on Data Engineering Workshop","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.4699999988079071,"display_name":"Life in Land"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309493","display_name":"Portland State University","ror":"https://ror.org/00yn2fy02"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W73222560","https://openalex.org/W141307743","https://openalex.org/W157967172","https://openalex.org/W343945789","https://openalex.org/W1504375824","https://openalex.org/W1565595968","https://openalex.org/W2029554959","https://openalex.org/W2034626816","https://openalex.org/W2059268928","https://openalex.org/W2095733021","https://openalex.org/W2101491706","https://openalex.org/W2161291780","https://openalex.org/W2171244245","https://openalex.org/W2295108729","https://openalex.org/W6605701462","https://openalex.org/W6606400306","https://openalex.org/W6674352496","https://openalex.org/W6684993414"],"related_works":["https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2501188010","https://openalex.org/W2095118173","https://openalex.org/W2382021449","https://openalex.org/W848359858","https://openalex.org/W2106424170","https://openalex.org/W4299935056","https://openalex.org/W2768810474","https://openalex.org/W2134629545"],"abstract_inverted_index":{"Traditional":[0],"data":[1,106,145],"integration":[2],"and":[3,15,61,74,95,98,115,151,188,196,199],"analysis":[4,57],"approaches":[5],"tend":[6],"to":[7,133,173],"assume":[8],"intimate":[9],"familiarity":[10],"with":[11,87,147],"the":[12,18,42,59,72,77,88,124,158,190],"structure,":[13],"semantics,":[14],"capabilities":[16],"of":[17,58,71,76,85,90,100,144],"available":[19],"information":[20,66,78],"sources":[21,146],"before":[22],"applicable":[23],"tools":[24],"can":[25],"be":[26],"used":[27,160],"effectively.":[28],"This":[29],"assumption":[30],"often":[31],"does":[32],"not":[33],"hold":[34],"in":[35,49,102,161,166,202],"practice.":[36],"We":[37,121],"introduce":[38],"dataspace":[39,92,113,119,135],"profiling":[40,54,114,136],"as":[41,80],"cardinal":[43],"activity":[44],"when":[45],"beginning":[46],"a":[47,81,91,129,142,153,177,185],"project":[48],"an":[50,56,65,193],"unfamiliar":[51],"dataspace.":[52],"Dataspace":[53],"is":[55],"structures":[60],"properties":[62],"exposed":[63],"by":[64],"source,":[67],"allowing":[68],"1)":[69,176],"assessment":[70,84],"utility":[73],"importance":[75],"source":[79],"whole,":[82],"2)":[83,189],"compatibility":[86],"services":[89],"support":[93,134],"platform,":[94],"3)":[96],"determination":[97],"externalization":[99],"structure":[101],"preparation":[103],"for":[104,118,180,192],"specific":[105],"applications.":[107,168],"In":[108],"this":[109],"paper,":[110],"we":[111,170],"define":[112],"articulate":[116],"requirements":[117],"profilers.":[120],"then":[122],"describe":[123],"Quarry":[125,162,172,205],"system,":[126,198],"which":[127],"offers":[128,206],"generic":[130,186],"browse-and-query":[131],"interface":[132],"activities,":[137],"including":[138],"path":[139],"profiling,":[140],"over":[141,208],"variety":[143],"minimal":[148,152],"setup":[149],"costs":[150],"priori":[154],"assumptions.We":[155],"show":[156],"that":[157,201],"mechanisms":[159],"deliver":[163],"strong":[164],"performance":[165],"large-scale":[167],"Specifically,":[169],"use":[171],"efficiently":[174],"profile":[175],"detailed":[178],"standard":[179],"medication":[181],"nomenclature":[182],"supplied":[183],"under":[184],"schema":[187],"metadata":[191],"environmental":[194],"observation":[195],"forecasting":[197],"conclude":[200],"these":[203],"contexts":[204],"advantages":[207],"existing":[209],"tools.":[210]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
