{"id":"https://openalex.org/W2125510575","doi":"https://doi.org/10.14778/1687627.1687699","title":"Summarizing relational databases","display_name":"Summarizing relational databases","publication_year":2009,"publication_date":"2009-08-01","ids":{"openalex":"https://openalex.org/W2125510575","doi":"https://doi.org/10.14778/1687627.1687699","mag":"2125510575"},"language":"en","primary_location":{"id":"doi:10.14778/1687627.1687699","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687699","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101767360","display_name":"Xiaoyan Yang","orcid":"https://orcid.org/0000-0001-8546-3589"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Xiaoyan Yang","raw_affiliation_strings":["National Univ. of Singapore, Republic of Singapore","National University of Singapore, Republic of Singapore"],"affiliations":[{"raw_affiliation_string":"National Univ. of Singapore, Republic of Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"National University of Singapore, Republic of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071763795","display_name":"Cecilia M. Procopiuc","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cecilia M. Procopiuc","raw_affiliation_strings":["AT&amp;T Labs--Research, Florham Park, NJ"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs--Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088315797","display_name":"Divesh Srivastava","orcid":"https://orcid.org/0000-0002-7609-9217"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divesh Srivastava","raw_affiliation_strings":["AT&amp;T Labs--Research, Florham Park, NJ"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs--Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101767360"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":14.5806,"has_fulltext":false,"cited_by_count":89,"citation_normalized_percentile":{"value":0.99160419,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"2","issue":"1","first_page":"634","last_page":"645"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7421911358833313},{"id":"https://openalex.org/keywords/view","display_name":"View","score":0.7242084741592407},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.6883070468902588},{"id":"https://openalex.org/keywords/database-schema","display_name":"Database schema","score":0.6506582498550415},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5516942739486694},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.50522381067276},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.5012965202331543},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.4901600778102875},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.48119646310806274},{"id":"https://openalex.org/keywords/database-design","display_name":"Database design","score":0.4559198021888733},{"id":"https://openalex.org/keywords/information-schema","display_name":"Information schema","score":0.4514533281326294},{"id":"https://openalex.org/keywords/database-model","display_name":"Database model","score":0.4334743618965149},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.374703586101532},{"id":"https://openalex.org/keywords/semi-structured-model","display_name":"Semi-structured model","score":0.15775737166404724}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7421911358833313},{"id":"https://openalex.org/C54239708","wikidata":"https://www.wikidata.org/wiki/Q1329910","display_name":"View","level":3,"score":0.7242084741592407},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.6883070468902588},{"id":"https://openalex.org/C30775581","wikidata":"https://www.wikidata.org/wiki/Q632285","display_name":"Database schema","level":3,"score":0.6506582498550415},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5516942739486694},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.50522381067276},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.5012965202331543},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.4901600778102875},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.48119646310806274},{"id":"https://openalex.org/C148840519","wikidata":"https://www.wikidata.org/wiki/Q1049878","display_name":"Database design","level":2,"score":0.4559198021888733},{"id":"https://openalex.org/C150012506","wikidata":"https://www.wikidata.org/wiki/Q6031185","display_name":"Information schema","level":5,"score":0.4514533281326294},{"id":"https://openalex.org/C5968703","wikidata":"https://www.wikidata.org/wiki/Q267136","display_name":"Database model","level":3,"score":0.4334743618965149},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.374703586101532},{"id":"https://openalex.org/C56310702","wikidata":"https://www.wikidata.org/wiki/Q2269281","display_name":"Semi-structured model","level":4,"score":0.15775737166404724},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.14778/1687627.1687699","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687699","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:scholarbank.nus.edu.sg:10635/130038","is_oa":false,"landing_page_url":"http://scholarbank.nus.edu.sg/handle/10635/130038","pdf_url":null,"source":{"id":"https://openalex.org/S7407052290","display_name":"National University of Singapore","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Scopus","raw_type":"Article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.151.8328","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.151.8328","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.vldb.org/pvldb/2/vldb09-784.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1524214323","https://openalex.org/W2013089587","https://openalex.org/W2032123413","https://openalex.org/W2048484388","https://openalex.org/W2099111195","https://openalex.org/W2117658379","https://openalex.org/W2126256512","https://openalex.org/W2136530502","https://openalex.org/W2146964183","https://openalex.org/W2294859229","https://openalex.org/W2295428206","https://openalex.org/W6631409916","https://openalex.org/W6697509678"],"related_works":["https://openalex.org/W2188578519","https://openalex.org/W279222092","https://openalex.org/W1502610062","https://openalex.org/W2591891926","https://openalex.org/W1803138684","https://openalex.org/W3085968255","https://openalex.org/W2901432962","https://openalex.org/W2911644013","https://openalex.org/W3191947563","https://openalex.org/W3015670717"],"abstract_inverted_index":{"Complex":[0],"databases":[1,15],"are":[2],"challenging":[3],"to":[4,76,86,178,225],"explore":[5],"and":[6,81,109,172,237,289],"query":[7],"by":[8],"users":[9,30],"unfamiliar":[10],"with":[11,64,206,257,263,292],"their":[12],"schemas.":[13],"Enterprise":[14],"often":[16],"have":[17],"hundreds":[18],"of":[19,36,73,90,105,128,155,163,181,210,282,295],"inter-linked":[20],"tables,":[21,236],"so":[22,94],"even":[23],"when":[24],"extensive":[25,247],"documentation":[26,56],"is":[27,52,57,204,286],"available,":[28],"new":[29],"must":[31],"spend":[32],"a":[33,83,91,96,101,140,164,190,197,216,251],"considerable":[34],"amount":[35],"time":[37],"understanding":[38],"the":[39,48,55,88,103,110,126,132,144,148,153,161,179,185,194,201,230,233,239,279,283,293,296],"schema":[40,145,297],"before":[41],"they":[42],"can":[43,98],"retrieve":[44],"any":[45],"information":[46,106,116,170],"from":[47],"database.":[49,186],"The":[50],"problem":[51],"aggravated":[53],"if":[54],"missing":[58],"or":[59],"outdated,":[60],"which":[61,114],"may":[62],"happen":[63],"legacy":[65],"databases.":[66],"In":[67],"this":[68,78,222],"paper":[69],"we":[70,124,188,214],"identify":[71],"limitations":[72],"previous":[74,258,280],"approaches":[75],"address":[77],"vexing":[79],"problem,":[80],"propose":[82],"principled":[84],"approach":[85,119,256,271],"summarizing":[87],"contents":[89],"relational":[92],"database,":[93,198,253],"that":[95,115,160,175,200,269],"user":[97],"determine":[99],"at":[100],"glance":[102],"type":[104],"it":[107],"contains,":[108],"main":[111],"tables":[112,183,195,228],"in":[113,131,139,184,196,229],"resides.":[117],"Our":[118],"has":[120],"three":[121],"components:":[122],"First,":[123],"define":[125,189],"importance":[127,162],"each":[129],"table":[130,156,165,211],"database":[133,231],"as":[134,241,260,262],"its":[135,169],"stable":[136],"state":[137,281],"value":[138],"random":[141],"walk":[142],"over":[143,193],"graph,":[146],"where":[147],"transition":[149],"probabilities":[150],"depend":[151],"on":[152,168,173,250],"entropies":[154],"attributes.":[157],"This":[158],"ensures":[159],"depends":[166],"both":[167],"content,":[171],"how":[174],"content":[176,180],"relates":[177],"other":[182],"Second,":[187],"metric":[191],"space":[192],"such":[199],"distance":[202,223],"function":[203,224],"consistent":[205],"an":[207,246],"intuitive":[208],"notion":[209],"similarity.":[212],"Finally,":[213],"use":[215],"Weighted":[217],"k":[218],"-Center":[219],"algorithm":[220],"under":[221],"cluster":[226],"all":[227],"around":[232],"most":[234],"relevant":[235],"return":[238],"result":[240],"our":[242,255,270],"summary.":[243],"We":[244,267],"conduct":[245],"experimental":[248],"study":[249],"benchmark":[252],"comparing":[254],"methods,":[259],"well":[261],"several":[264],"hybrid":[265],"models.":[266],"show":[268],"not":[272],"only":[273],"achieves":[274],"significantly":[275],"higher":[276],"accuracy":[277],"than":[278],"art,":[284],"but":[285],"also":[287],"faster":[288],"scales":[290],"linearly":[291],"size":[294],"graph.":[298]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":10},{"year":2012,"cited_by_count":18}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
