{"id":"https://openalex.org/W4281731903","doi":"https://doi.org/10.1145/3514221.3520156","title":"Simplifying Access to Large-scale Structured Datasets by Meta-Profiling with Scalable Training Set Enrichment","display_name":"Simplifying Access to Large-scale Structured Datasets by Meta-Profiling with Scalable Training Set Enrichment","publication_year":2022,"publication_date":"2022-06-10","ids":{"openalex":"https://openalex.org/W4281731903","doi":"https://doi.org/10.1145/3514221.3520156"},"language":"en","primary_location":{"id":"doi:10.1145/3514221.3520156","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3514221.3520156","pdf_url":null,"source":{"id":"https://openalex.org/S4363608845","display_name":"Proceedings of the 2022 International Conference on Management of Data","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001552583","display_name":"Sophie Pavia","orcid":"https://orcid.org/0000-0002-9015-8906"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sophie Pavia","raw_affiliation_strings":["Florida State University, Tallahasse, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahasse, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071549682","display_name":"Rituparna Khan","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rituparna Khan","raw_affiliation_strings":["Florida State University, Tallahasse, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahasse, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005591013","display_name":"Anna Pyayt","orcid":"https://orcid.org/0000-0001-8432-6179"},"institutions":[{"id":"https://openalex.org/I2613432","display_name":"University of South Florida","ror":"https://ror.org/032db5x82","country_code":"US","type":"education","lineage":["https://openalex.org/I2613432"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anna Pyayt","raw_affiliation_strings":["University of South Florida, Tallahasse, FL, USA"],"affiliations":[{"raw_affiliation_string":"University of South Florida, Tallahasse, FL, USA","institution_ids":["https://openalex.org/I2613432"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047938200","display_name":"Michael Gubanov","orcid":"https://orcid.org/0000-0002-1354-1215"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Gubanov","raw_affiliation_strings":["Florida State University, Tallahasse, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida State University, Tallahasse, FL, USA","institution_ids":["https://openalex.org/I103163165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5001552583"],"corresponding_institution_ids":["https://openalex.org/I103163165"],"apc_list":null,"apc_paid":null,"fwci":0.5147,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.52972683,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2377","last_page":"2380"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7925276160240173},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.7539272904396057},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7273945212364197},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5119184255599976},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4857681095600128},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4667793810367584},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3995524048805237},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3973771631717682},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3899434804916382},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2799467444419861},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19843357801437378},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07384377717971802}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7925276160240173},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.7539272904396057},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7273945212364197},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5119184255599976},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4857681095600128},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4667793810367584},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3995524048805237},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3973771631717682},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3899434804916382},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2799467444419861},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19843357801437378},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07384377717971802},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3514221.3520156","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3514221.3520156","pdf_url":null,"source":{"id":"https://openalex.org/S4363608845","display_name":"Proceedings of the 2022 International Conference on Management of Data","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W1679882079","https://openalex.org/W1973656812","https://openalex.org/W2001130020","https://openalex.org/W2008896880","https://openalex.org/W2024796520","https://openalex.org/W2049708798","https://openalex.org/W2092486351","https://openalex.org/W2108223890","https://openalex.org/W2111869785","https://openalex.org/W2125822162","https://openalex.org/W2171308734","https://openalex.org/W2340354588","https://openalex.org/W2341748398","https://openalex.org/W2604190938","https://openalex.org/W2604741280","https://openalex.org/W2898796029","https://openalex.org/W2914901005","https://openalex.org/W3045211065","https://openalex.org/W3094067549","https://openalex.org/W4205778639","https://openalex.org/W6628708910","https://openalex.org/W6638231387","https://openalex.org/W6713731013","https://openalex.org/W6738456082","https://openalex.org/W6748634344","https://openalex.org/W6762433658","https://openalex.org/W6778608065"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W2965083567","https://openalex.org/W4235240664","https://openalex.org/W2161444195","https://openalex.org/W1838576100","https://openalex.org/W2757182831","https://openalex.org/W2095886385","https://openalex.org/W2089704382","https://openalex.org/W1983399550","https://openalex.org/W3151146928"],"abstract_inverted_index":{"Accessing":[0],"large-scale":[1],"structured":[2],"datasets":[3],"such":[4],"as":[5],"WDC":[6],"[21],":[7],"having":[8],"millions":[9],"of":[10,15,17,35,44],"tables":[11,38],"coming":[12],"from":[13],"hundreds":[14,43],"thousands":[16],"sources":[18,41],"is":[19,34],"very":[20],"challenging":[21],"[11,":[22],"13,":[23],"14,":[24],"30,":[25],"31].":[26],"Even":[27],"if":[28],"one":[29],"topic":[30],"(e.g.":[31],"Job":[32],"postings)":[33],"interest,":[36],"Jobs":[37],"in":[39],"different":[40,45],"have":[42],"schemas,":[46],"which":[47],"significantly":[48],"complicates":[49],"both":[50],"finding":[51],"and":[52],"querying":[53],"them.":[54]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
