{"id":"https://openalex.org/W2119309048","doi":"https://doi.org/10.1145/1516360.1516372","title":"Type-based categorization of relational attributes","display_name":"Type-based categorization of relational attributes","publication_year":2009,"publication_date":"2009-03-24","ids":{"openalex":"https://openalex.org/W2119309048","doi":"https://doi.org/10.1145/1516360.1516372","mag":"2119309048"},"language":"en","primary_location":{"id":"doi:10.1145/1516360.1516372","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1516360.1516372","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1516360.1516372","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/1516360.1516372","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021015017","display_name":"Babak Ahmadi","orcid":"https://orcid.org/0000-0001-8635-5256"},"institutions":[{"id":"https://openalex.org/I4210144576","display_name":"Fraunhofer Institute for Intelligent Analysis and Information Systems","ror":"https://ror.org/04nc32781","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210144576","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Babak Ahmadi","raw_affiliation_strings":["Fraunhofer IAIS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fraunhofer IAIS","institution_ids":["https://openalex.org/I4210144576"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058831095","display_name":"Marios Hadjieleftheriou","orcid":null},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marios Hadjieleftheriou","raw_affiliation_strings":["AT&amp;T Labs Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs Research","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003335849","display_name":"Thomas Seidl","orcid":"https://orcid.org/0000-0002-4861-1412"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Thomas Seidl","raw_affiliation_strings":["RWTH Aachen University","[Rwth Aachen University]"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"RWTH Aachen University","institution_ids":["https://openalex.org/I887968799"]},{"raw_affiliation_string":"[Rwth Aachen University]","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088315797","display_name":"Divesh Srivastava","orcid":"https://orcid.org/0000-0002-7609-9217"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divesh Srivastava","raw_affiliation_strings":["AT&amp;T Labs Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs Research","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061790878","display_name":"Suresh Venkatasubramanian","orcid":"https://orcid.org/0000-0001-7679-7130"},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suresh Venkatasubramanian","raw_affiliation_strings":["University of Utah","University of Utah,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Utah","institution_ids":["https://openalex.org/I223532165"]},{"raw_affiliation_string":"University of Utah,","institution_ids":["https://openalex.org/I223532165"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2494,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.82725573,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"84","last_page":"95"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7701894044876099},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6638877391815186},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6266892552375793},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.6076095104217529},{"id":"https://openalex.org/keywords/data-type","display_name":"Data type","score":0.4906734228134155},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.4711640775203705},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.46245479583740234},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4484359622001648},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.4365622401237488},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3970666229724884},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3294220566749573},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3197072744369507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7701894044876099},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6638877391815186},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6266892552375793},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6076095104217529},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.4906734228134155},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.4711640775203705},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.46245479583740234},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4484359622001648},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.4365622401237488},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3970666229724884},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3294220566749573},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3197072744369507},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":8,"locations":[{"id":"doi:10.1145/1516360.1516372","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1516360.1516372","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1516360.1516372","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.150.3492","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.150.3492","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://research.att.com/~marioh/papers/edbt09.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.169.8301","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.169.8301","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-kd.iai.uni-bonn.de/pubattachments/388/ahmadi-edbt09.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.329.7372","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.329.7372","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.edbt.org/Proceedings/2009-StPetersburg/edbt/papers/p0084-Ahmadi.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.371.3651","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.371.3651","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.utah.edu/~suresh/web/wp-content/uploads/2008/11/paper.pdf","raw_type":"text"},{"id":"pmh:oai:fraunhofer.de:N-188338","is_oa":false,"landing_page_url":"http://publica.fraunhofer.de/documents/N-188338.html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400801","display_name":"Publikationsdatenbank der Fraunhofer-Gesellschaft (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Fraunhofer IAIS","raw_type":"Conference Paper"},{"id":"pmh:oai:publica.fraunhofer.de:publica/365746","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/365746","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"},{"id":"pmh:oai:publications.rwth-aachen.de:125322","is_oa":false,"landing_page_url":"https://publications.rwth-aachen.de/record/125322","pdf_url":null,"source":{"id":"https://openalex.org/S4306401362","display_name":"RWTH Publications (RWTH Aachen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887968799","host_organization_name":"RWTH Aachen University","host_organization_lineage":["https://openalex.org/I887968799"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Advances in Database Technology - EDBT 2009 : 12th International Conference on Extending Database Technology, Saint Petersburg, March 24 - 26, 2009 ; proceedings / eds: Martin Kersten ... Association for Computing Machinery<br/>12. International Conference on Extending Database Technology, EDBT 2009, Saint Petersburg, Russia, 2009-03-24 - 2009-03-26","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1145/1516360.1516372","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1516360.1516372","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1516360.1516372","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th International Conference on Extending Database Technology: Advances in Database Technology","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.5400000214576721}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2119309048.pdf","grobid_xml":"https://content.openalex.org/works/W2119309048.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1502916507","https://openalex.org/W1534408902","https://openalex.org/W1612155886","https://openalex.org/W1660390307","https://openalex.org/W1781748254","https://openalex.org/W1856955241","https://openalex.org/W1965996575","https://openalex.org/W1984566373","https://openalex.org/W1990368529","https://openalex.org/W2000901932","https://openalex.org/W2036705516","https://openalex.org/W2048779798","https://openalex.org/W2052207834","https://openalex.org/W2060314721","https://openalex.org/W2064580901","https://openalex.org/W2073471108","https://openalex.org/W2113076747","https://openalex.org/W2132069633","https://openalex.org/W2135878030","https://openalex.org/W2138745909","https://openalex.org/W2150698190","https://openalex.org/W2163372266","https://openalex.org/W2180101149","https://openalex.org/W3004104752","https://openalex.org/W3099514962","https://openalex.org/W3142071826","https://openalex.org/W4295063741","https://openalex.org/W6629956336","https://openalex.org/W6637101025","https://openalex.org/W6679663036","https://openalex.org/W6776535907"],"related_works":["https://openalex.org/W2165912799","https://openalex.org/W2735662278","https://openalex.org/W2382615723","https://openalex.org/W4311804456","https://openalex.org/W1987484445","https://openalex.org/W2623658258","https://openalex.org/W2143413548","https://openalex.org/W1969219540","https://openalex.org/W2370459448","https://openalex.org/W2105067402"],"abstract_inverted_index":{"In":[0],"this":[1],"work":[2],"we":[3,23,136,159],"concentrate":[4,104],"on":[5,11,37,105,112,133,194],"categorization":[6],"of":[7,29,41,149,170,182],"relational":[8],"attributes":[9,35,203],"based":[10,36,111,132,193],"their":[12],"data":[13,43,79,125,151,172,185],"type.":[14],"Assuming":[15],"that":[16,81,122,145,199],"attribute":[17,109],"type/characteristics":[18],"are":[19,82],"unknown":[20],"or":[21,95],"unidentifiable,":[22],"analyze":[24],"and":[25,67,99,115,128,175,197],"compare":[26],"a":[27,85,146,166,183],"variety":[28],"type-based":[30,108],"signatures":[31,55,110,131],"for":[32,60,154],"classifying":[33],"the":[34,38,42,143,150,156,171,179],"semantic":[39],"type":[40,186],"contained":[44],"therein":[45],"(e.g.,":[46],"router":[47],"identifiers,":[48],"social":[49],"security":[50],"numbers,":[51],"email":[52],"addresses).":[53],"The":[54],"can":[56],"subsequently":[57],"be":[58],"used":[59],"other":[61],"applications":[62],"as":[63],"well,":[64],"like":[65],"clustering":[66],"index":[68],"optimization/compression.":[69],"This":[70],"application":[71],"is":[72,152,173,187],"useful":[73],"in":[74,84],"cases":[75,164],"where":[76,165],"very":[77,96,138,167],"large":[78,147],"collections":[80],"generated":[83],"distributed,":[86],"ungoverned":[87],"fashion":[88],"end":[89],"up":[90],"having":[91],"unknown,":[92],"incomplete,":[93],"inconsistent":[94],"complex":[97],"schemata":[98],"schema":[100],"level":[101],"meta-data.":[102],"We":[103,119,190],"heuristically":[106],"generating":[107],"both":[113],"local":[114],"global":[116],"computation":[117],"approaches.":[118],"show":[120],"experimentally":[121],"by":[123],"decomposing":[124],"into":[126],"q-grams":[127],"then":[129],"considering":[130],"q-gram":[134,180],"distributions,":[135],"achieve":[137],"good":[139],"classification":[140,206],"accuracy":[141],"under":[142],"assumption":[144],"sample":[148,169],"available":[153],"building":[155],"signatures.":[157],"Then,":[158],"turn":[160],"our":[161],"attention":[162],"to":[163,204],"small":[168],"available,":[174],"hence":[176],"accurately":[177],"capturing":[178],"distribution":[181],"given":[184],"almost":[188],"impossible.":[189],"propose":[191],"techniques":[192],"dimensionality":[195],"reduction":[196],"soft-clustering":[198],"exploit":[200],"correlations":[201],"between":[202],"improve":[205],"accuracy.":[207]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
