{"id":"https://openalex.org/W2116020486","doi":"https://doi.org/10.1145/1978802.1978804","title":"XML data clustering","display_name":"XML data clustering","publication_year":2011,"publication_date":"2011-10-01","ids":{"openalex":"https://openalex.org/W2116020486","doi":"https://doi.org/10.1145/1978802.1978804","mag":"2116020486"},"language":"en","primary_location":{"id":"doi:10.1145/1978802.1978804","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1978802.1978804","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.qut.edu.au/48253/1/CompSur.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046373717","display_name":"Alsayed Algergawy","orcid":"https://orcid.org/0000-0002-8550-4720"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Alsayed Algergawy","raw_affiliation_strings":["Madgeburg University, Madegeburg, Germany"],"affiliations":[{"raw_affiliation_string":"Madgeburg University, Madegeburg, Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089929382","display_name":"Marco Mesiti","orcid":"https://orcid.org/0000-0001-5701-0080"},"institutions":[{"id":"https://openalex.org/I189158943","display_name":"University of Milan","ror":"https://ror.org/00wjc7c48","country_code":"IT","type":"education","lineage":["https://openalex.org/I189158943"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Mesiti","raw_affiliation_strings":["University of Milano, Milano, Italy","Univ. of Milano, Milano, Italy#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Milano, Milano, Italy","institution_ids":["https://openalex.org/I189158943"]},{"raw_affiliation_string":"Univ. of Milano, Milano, Italy#TAB#","institution_ids":["https://openalex.org/I189158943"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015158048","display_name":"Richi Nayak","orcid":"https://orcid.org/0000-0002-9954-0159"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Richi Nayak","raw_affiliation_strings":["Queensland University of Technology, Brisbane, Australia"],"affiliations":[{"raw_affiliation_string":"Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042946019","display_name":"Gunter Saake","orcid":"https://orcid.org/0000-0001-9576-8474"},"institutions":[{"id":"https://openalex.org/I4210138551","display_name":"University Hospital Magdeburg","ror":"https://ror.org/03m04df46","country_code":"DE","type":"healthcare","lineage":["https://openalex.org/I4210138551"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gunter Saake","raw_affiliation_strings":["Magdeburg University, Magdeburg, Germany"],"affiliations":[{"raw_affiliation_string":"Magdeburg University, Magdeburg, Germany","institution_ids":["https://openalex.org/I4210138551"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5046373717"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.9513,"has_fulltext":true,"cited_by_count":60,"citation_normalized_percentile":{"value":0.96456494,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"43","issue":"4","first_page":"1","last_page":"41"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8621842861175537},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8155725598335266},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.5714281797409058},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5574994683265686},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5173801183700562},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.50635826587677},{"id":"https://openalex.org/keywords/xml-schema","display_name":"XML Schema (W3C)","score":0.49725964665412903},{"id":"https://openalex.org/keywords/xml-validation","display_name":"XML validation","score":0.4890082776546478},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.46645301580429077},{"id":"https://openalex.org/keywords/xml-schema-editor","display_name":"XML Schema Editor","score":0.4563212990760803},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.41118431091308594},{"id":"https://openalex.org/keywords/xml-signature","display_name":"XML Signature","score":0.17236343026161194},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16238507628440857},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.11786940693855286}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8621842861175537},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8155725598335266},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.5714281797409058},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5574994683265686},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5173801183700562},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.50635826587677},{"id":"https://openalex.org/C40713593","wikidata":"https://www.wikidata.org/wiki/Q16342","display_name":"XML Schema (W3C)","level":5,"score":0.49725964665412903},{"id":"https://openalex.org/C55348073","wikidata":"https://www.wikidata.org/wiki/Q595926","display_name":"XML validation","level":3,"score":0.4890082776546478},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.46645301580429077},{"id":"https://openalex.org/C34716815","wikidata":"https://www.wikidata.org/wiki/Q8042322","display_name":"XML Schema Editor","level":3,"score":0.4563212990760803},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.41118431091308594},{"id":"https://openalex.org/C34330436","wikidata":"https://www.wikidata.org/wiki/Q979532","display_name":"XML Signature","level":4,"score":0.17236343026161194},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16238507628440857},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.11786940693855286}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1978802.1978804","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1978802.1978804","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},{"id":"pmh:oai:eprints.qut.edu.au:48253","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.qut.edu.au/48253/1/CompSur.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Computing Surveys","raw_type":"Contribution to Journal"},{"id":"pmh:oai:air.unimi.it:2434/171306","is_oa":true,"landing_page_url":"http://hdl.handle.net/2434/171306","pdf_url":"http://hdl.handle.net/2434/171306","source":{"id":"https://openalex.org/S4306400516","display_name":"Archivio Istituzionale della Ricerca (Universita Degli Studi Di Milano)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I189158943","host_organization_name":"University of Milan","host_organization_lineage":["https://openalex.org/I189158943"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:eprints.qut.edu.au:48253","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.qut.edu.au/48253/1/CompSur.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Computing Surveys","raw_type":"Contribution to Journal"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W2116020486.pdf"},"referenced_works_count":116,"referenced_works":["https://openalex.org/W7950900","https://openalex.org/W8870360","https://openalex.org/W58351694","https://openalex.org/W87804463","https://openalex.org/W89761811","https://openalex.org/W97077349","https://openalex.org/W135761774","https://openalex.org/W198468505","https://openalex.org/W564455539","https://openalex.org/W1482673573","https://openalex.org/W1486540001","https://openalex.org/W1486840803","https://openalex.org/W1496147749","https://openalex.org/W1501500081","https://openalex.org/W1505171204","https://openalex.org/W1518221903","https://openalex.org/W1519440323","https://openalex.org/W1522345903","https://openalex.org/W1529522905","https://openalex.org/W1532325895","https://openalex.org/W1542493855","https://openalex.org/W1548171311","https://openalex.org/W1569362407","https://openalex.org/W1575714782","https://openalex.org/W1592758009","https://openalex.org/W1601158495","https://openalex.org/W1619624064","https://openalex.org/W1628571627","https://openalex.org/W1641039719","https://openalex.org/W1646278814","https://openalex.org/W1660390307","https://openalex.org/W1781737156","https://openalex.org/W1844192336","https://openalex.org/W1878684993","https://openalex.org/W1891372937","https://openalex.org/W1965492961","https://openalex.org/W1967921541","https://openalex.org/W1968125506","https://openalex.org/W1971318281","https://openalex.org/W1975009259","https://openalex.org/W1976373002","https://openalex.org/W1977615307","https://openalex.org/W1978478796","https://openalex.org/W1979954747","https://openalex.org/W1987233071","https://openalex.org/W1987995481","https://openalex.org/W1990261244","https://openalex.org/W1991645955","https://openalex.org/W1992419399","https://openalex.org/W1998839018","https://openalex.org/W2004891392","https://openalex.org/W2006636107","https://openalex.org/W2008896880","https://openalex.org/W2016885468","https://openalex.org/W2016973429","https://openalex.org/W2018065175","https://openalex.org/W2021993511","https://openalex.org/W2022078289","https://openalex.org/W2023883421","https://openalex.org/W2024877380","https://openalex.org/W2033365000","https://openalex.org/W2036389305","https://openalex.org/W2046341811","https://openalex.org/W2047260621","https://openalex.org/W2057712948","https://openalex.org/W2085840241","https://openalex.org/W2085885823","https://openalex.org/W2087613132","https://openalex.org/W2089634871","https://openalex.org/W2090347585","https://openalex.org/W2095560923","https://openalex.org/W2095897464","https://openalex.org/W2102247866","https://openalex.org/W2103069411","https://openalex.org/W2107302432","https://openalex.org/W2107412086","https://openalex.org/W2108584692","https://openalex.org/W2109464129","https://openalex.org/W2111110587","https://openalex.org/W2114801888","https://openalex.org/W2121622048","https://openalex.org/W2123772748","https://openalex.org/W2128381480","https://openalex.org/W2130094385","https://openalex.org/W2131687179","https://openalex.org/W2135187880","https://openalex.org/W2138745488","https://openalex.org/W2139135093","https://openalex.org/W2141777121","https://openalex.org/W2142234917","https://openalex.org/W2144405306","https://openalex.org/W2149230623","https://openalex.org/W2150908245","https://openalex.org/W2150926065","https://openalex.org/W2153233077","https://openalex.org/W2156543375","https://openalex.org/W2157060173","https://openalex.org/W2160873337","https://openalex.org/W2165286227","https://openalex.org/W2165612380","https://openalex.org/W2256258350","https://openalex.org/W2335920051","https://openalex.org/W2487224391","https://openalex.org/W2501627144","https://openalex.org/W2517878783","https://openalex.org/W2580302247","https://openalex.org/W2737168042","https://openalex.org/W3021753307","https://openalex.org/W3139328003","https://openalex.org/W4213009331","https://openalex.org/W4239820202","https://openalex.org/W4372267129","https://openalex.org/W6600367688","https://openalex.org/W6607449312","https://openalex.org/W6628988919","https://openalex.org/W6637101025"],"related_works":["https://openalex.org/W1970085714","https://openalex.org/W2376675693","https://openalex.org/W2013920494","https://openalex.org/W2378161351","https://openalex.org/W2146760738","https://openalex.org/W2026908501","https://openalex.org/W1533091132","https://openalex.org/W2585052215","https://openalex.org/W56340752","https://openalex.org/W2232393585"],"abstract_inverted_index":{"In":[0,60,101],"the":[1,36,40,50,66,83,92,98,112,150,154],"last":[2],"few":[3],"years":[4],"we":[5,63,75,104],"have":[6],"observed":[7],"a":[8,28,108,138],"proliferation":[9],"of":[10,26,31,42,52,86,156],"approaches":[11,32,77,114],"for":[12],"clustering":[13,41,71,99,134,140,147],"XML":[14,43,132,146],"documents":[15],"and":[16,22,58,96,118,142,159],"schemas":[17],"based":[18],"on":[19,82,91,97],"their":[20],"structure":[21],"content.":[23],"The":[24],"presence":[25],"such":[27],"huge":[29],"amount":[30],"is":[33,72,128],"due":[34],"to":[35,106,165],"different":[37],"applications":[38,46],"requiring":[39],"data.":[44],"These":[45],"need":[47,164],"data":[48,87,133],"in":[49,69,110],"form":[51],"similar":[53],"contents,":[54],"tags,":[55],"paths,":[56],"structures,":[57],"semantics.":[59],"this":[61,102],"article,":[62],"first":[64],"outline":[65],"application":[67],"contexts":[68],"which":[70,111],"useful,":[73],"then":[74],"survey":[76],"so":[78],"far":[79],"proposed":[80],"relying":[81],"abstract":[84],"representation":[85],"(instances":[88],"or":[89],"schema),":[90],"identified":[93],"similarity":[94],"measure,":[95],"algorithm.":[100],"presentation,":[103],"aim":[105,121],"draw":[107],"taxonomy":[109],"current":[113],"can":[115],"be":[116,166],"classified":[117],"compared.":[119],"We":[120],"at":[122],"introducing":[123],"an":[124,145],"integrated":[125],"view":[126],"that":[127,162],"useful":[129],"when":[130,136,143],"comparing":[131],"approaches,":[135],"developing":[137],"new":[139],"algorithm,":[141],"implementing":[144],"component.":[148],"Finally,":[149],"article":[151],"moves":[152],"into":[153],"description":[155],"future":[157],"trends":[158],"research":[160],"issues":[161],"still":[163],"faced.":[167]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":11},{"year":2015,"cited_by_count":8},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
