{"id":"https://openalex.org/W2123878016","doi":"https://doi.org/10.1145/1989323.1989336","title":"Automatic discovery of attributes in relational databases","display_name":"Automatic discovery of attributes in relational databases","publication_year":2011,"publication_date":"2011-06-12","ids":{"openalex":"https://openalex.org/W2123878016","doi":"https://doi.org/10.1145/1989323.1989336","mag":"2123878016"},"language":"en","primary_location":{"id":"doi:10.1145/1989323.1989336","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1989323.1989336","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM SIGMOD International Conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032263010","display_name":"Meihui Zhang","orcid":"https://orcid.org/0000-0002-0752-9877"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Meihui Zhang","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058831095","display_name":"Marios Hadjieleftheriou","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marios Hadjieleftheriou","raw_affiliation_strings":["AT&amp;T Labs - Research, Florham Park, NJ, USA","AT&T Labs---Research, Florham Park, NJ, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs - Research, Florham Park, NJ, USA","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"AT&T Labs---Research, Florham Park, NJ, USA#TAB#","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024892041","display_name":"Beng Chin Ooi","orcid":"https://orcid.org/0000-0003-4446-1100"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Beng Chin Ooi","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071763795","display_name":"Cecilia M. Procopiuc","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cecilia M. Procopiuc","raw_affiliation_strings":["AT&amp;T Labs - Research, Florham Park, NJ, USA","AT&T Labs---Research, Florham Park, NJ, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs - Research, Florham Park, NJ, USA","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"AT&T Labs---Research, Florham Park, NJ, USA#TAB#","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088315797","display_name":"Divesh Srivastava","orcid":"https://orcid.org/0000-0002-7609-9217"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divesh Srivastava","raw_affiliation_strings":["AT&amp;T Labs - Research, Florham Park, NJ, USA","AT&T Labs---Research, Florham Park, NJ, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs - Research, Florham Park, NJ, USA","institution_ids":["https://openalex.org/I1283103587"]},{"raw_affiliation_string":"AT&T Labs---Research, Florham Park, NJ, USA#TAB#","institution_ids":["https://openalex.org/I1283103587"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032263010"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":4.2336,"has_fulltext":false,"cited_by_count":51,"citation_normalized_percentile":{"value":0.94032464,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"109","last_page":"120"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7711402177810669},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.6810774803161621},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5731297135353088},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.48073816299438477}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7711402177810669},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.6810774803161621},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5731297135353088},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48073816299438477}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1989323.1989336","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1989323.1989336","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM SIGMOD International Conference on Management of data","raw_type":"proceedings-article"},{"id":"pmh:oai:scholarbank.nus.edu.sg:10635/42024","is_oa":false,"landing_page_url":"http://scholarbank.nus.edu.sg/handle/10635/42024","pdf_url":null,"source":{"id":"https://openalex.org/S7407052290","display_name":"National University of Singapore","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Scopus","raw_type":"Conference Paper"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.221.2983","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.221.2983","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://research.att.com/%7Emarioh/papers/sigmod11.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W72943092","https://openalex.org/W1502916507","https://openalex.org/W2008896880","https://openalex.org/W2042389627","https://openalex.org/W2067308467","https://openalex.org/W2081193615","https://openalex.org/W2091858563","https://openalex.org/W2110686900","https://openalex.org/W2112129552","https://openalex.org/W2119309048","https://openalex.org/W2121516976","https://openalex.org/W2123845384","https://openalex.org/W2139135093","https://openalex.org/W2142104809","https://openalex.org/W2157060173","https://openalex.org/W2914959486","https://openalex.org/W6629956336","https://openalex.org/W6680506379"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2530322880","https://openalex.org/W1596801655"],"abstract_inverted_index":{"In":[0],"this":[1],"work":[2],"we":[3,130,182,203],"design":[4,97],"algorithms":[5],"for":[6,13,116,121],"clustering":[7],"relational":[8,57],"columns":[9,18,39,92,175,193],"into":[10,186,194],"attributes,":[11],"i.e.,":[12],"identifying":[14,33],"strong":[15,154],"relationships":[16,74,90,111,155],"between":[17,75,91,156],"based":[19],"on":[20],"the":[21,27,82,127,142,157,165,184,198],"common":[22],"properties":[23],"and":[24,67,119,124,176,189,212],"characteristics":[25],"of":[26,38,50,54,84,144,159,162,192,200],"values":[28,158],"they":[29,100],"contain.":[30],"For":[31],"example,":[32],"whether":[34],"a":[35,105,160,168,206],"certain":[36,89],"set":[37,161],"refers":[40],"to":[41,72,152,173,179],"telephone":[42],"numbers":[43],"versus":[44,52],"social":[45],"security":[46],"numbers,":[47],"or":[48,99],"names":[49,53],"customers":[51],"nations.":[55],"Traditional":[56],"database":[58,107,166,174],"schema":[59,79,117],"languages":[60,80],"use":[61,149],"very":[62],"limited":[63],"primitive":[64],"data":[65,86,132],"types":[66],"simple":[68],"foreign":[69],"key":[70],"constraints":[71],"express":[73],"columns.":[76,163],"Object":[77],"oriented":[78,133],"allow":[81],"definition":[83],"custom":[85],"types;":[87],"still,":[88],"might":[93,101],"be":[94],"unknown":[95],"at":[96],"time":[98],"appear":[102],"only":[103],"in":[104],"particular":[106],"instance.":[108],"Nevertheless,":[109],"these":[110],"are":[112],"an":[113],"invaluable":[114],"tool":[115],"matching,":[118],"generally":[120],"better":[122],"understanding":[123],"working":[125],"with":[126],"data.":[128],"Here,":[129],"introduce":[131],"solutions":[134,139],"(we":[135],"do":[136],"not":[137],"consider":[138],"that":[140,148],"assume":[141],"existence":[143],"any":[145],"external":[146],"knowledge)":[147],"statistical":[150],"measures":[151],"identify":[153],"Interpreting":[164],"as":[167],"graph":[169,185],"where":[170],"nodes":[171],"correspond":[172,178],"edges":[177],"column":[180],"relationships,":[181],"decompose":[183],"connected":[187],"components":[188],"cluster":[190],"sets":[191],"attributes.":[195],"To":[196],"test":[197],"quality":[199],"our":[201],"solution,":[202],"also":[204],"provide":[205],"comprehensive":[207],"experimental":[208],"evaluation":[209],"using":[210],"real":[211],"synthetic":[213],"datasets.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
