{"id":"https://openalex.org/W2057211779","doi":"https://doi.org/10.1142/s0218213005002326","title":"XML CLUSTERING AND RETRIEVAL THROUGH PRINCIPAL COMPONENT ANALYSIS","display_name":"XML CLUSTERING AND RETRIEVAL THROUGH PRINCIPAL COMPONENT ANALYSIS","publication_year":2005,"publication_date":"2005-07-25","ids":{"openalex":"https://openalex.org/W2057211779","doi":"https://doi.org/10.1142/s0218213005002326","mag":"2057211779"},"language":"en","primary_location":{"id":"doi:10.1142/s0218213005002326","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218213005002326","pdf_url":null,"source":{"id":"https://openalex.org/S178780388","display_name":"International Journal of Artificial Intelligence Tools","issn_l":"0218-2130","issn":["0218-2130","1793-6349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Artificial Intelligence Tools","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100378977","display_name":"Jason T. L. Wang","orcid":"https://orcid.org/0000-0002-2486-1097"},"institutions":[{"id":"https://openalex.org/I118118575","display_name":"New Jersey Institute of Technology","ror":"https://ror.org/05e74xb87","country_code":"US","type":"education","lineage":["https://openalex.org/I118118575"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"JASON T. L. WANG","raw_affiliation_strings":["Department of Computer Science,  New Jersey Institute of Technology, University Heights,  Newark, NJ 07102, USA","Department of Computer Science, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102, USA,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science,  New Jersey Institute of Technology, University Heights,  Newark, NJ 07102, USA","institution_ids":["https://openalex.org/I118118575"]},{"raw_affiliation_string":"Department of Computer Science, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102, USA,","institution_ids":["https://openalex.org/I118118575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101920817","display_name":"Jianghui Liu","orcid":"https://orcid.org/0000-0003-4201-4913"},"institutions":[{"id":"https://openalex.org/I118118575","display_name":"New Jersey Institute of Technology","ror":"https://ror.org/05e74xb87","country_code":"US","type":"education","lineage":["https://openalex.org/I118118575"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"JIANGHUI LIU","raw_affiliation_strings":["Department of Computer Science,  New Jersey Institute of Technology, University Heights,  Newark, NJ 07102, USA","Department of Computer Science, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102, USA,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science,  New Jersey Institute of Technology, University Heights,  Newark, NJ 07102, USA","institution_ids":["https://openalex.org/I118118575"]},{"raw_affiliation_string":"Department of Computer Science, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102, USA,","institution_ids":["https://openalex.org/I118118575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112793871","display_name":"Junhan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I118118575","display_name":"New Jersey Institute of Technology","ror":"https://ror.org/05e74xb87","country_code":"US","type":"education","lineage":["https://openalex.org/I118118575"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"JUNHAN WANG","raw_affiliation_strings":["Department of Computer Science,  New Jersey Institute of Technology, University Heights,  Newark, NJ 07102, USA","Department of Computer Science, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102, USA,"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science,  New Jersey Institute of Technology, University Heights,  Newark, NJ 07102, USA","institution_ids":["https://openalex.org/I118118575"]},{"raw_affiliation_string":"Department of Computer Science, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102, USA,","institution_ids":["https://openalex.org/I118118575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I118118575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.16497577,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"14","issue":"04","first_page":"683","last_page":"699"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8772611021995544},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6750797033309937},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.6152530908584595},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6011344790458679},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.5818844437599182},{"id":"https://openalex.org/keywords/xml-validation","display_name":"XML validation","score":0.5247328877449036},{"id":"https://openalex.org/keywords/efficient-xml-interchange","display_name":"Efficient XML Interchange","score":0.501054048538208},{"id":"https://openalex.org/keywords/vector-space-model","display_name":"Vector space model","score":0.4945227801799774},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.48729026317596436},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.46468237042427063},{"id":"https://openalex.org/keywords/xml-database","display_name":"XML database","score":0.45823293924331665},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19848260283470154},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.12976211309432983}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8772611021995544},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6750797033309937},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.6152530908584595},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6011344790458679},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.5818844437599182},{"id":"https://openalex.org/C55348073","wikidata":"https://www.wikidata.org/wiki/Q595926","display_name":"XML validation","level":3,"score":0.5247328877449036},{"id":"https://openalex.org/C11508877","wikidata":"https://www.wikidata.org/wiki/Q1124477","display_name":"Efficient XML Interchange","level":3,"score":0.501054048538208},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.4945227801799774},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.48729026317596436},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.46468237042427063},{"id":"https://openalex.org/C183068750","wikidata":"https://www.wikidata.org/wiki/Q357393","display_name":"XML database","level":3,"score":0.45823293924331665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19848260283470154},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.12976211309432983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218213005002326","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218213005002326","pdf_url":null,"source":{"id":"https://openalex.org/S178780388","display_name":"International Journal of Artificial Intelligence Tools","issn_l":"0218-2130","issn":["0218-2130","1793-6349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Artificial Intelligence Tools","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W208128215","https://openalex.org/W1976373002","https://openalex.org/W1987822086","https://openalex.org/W1996760268","https://openalex.org/W2028936054","https://openalex.org/W2094034674","https://openalex.org/W2128695242","https://openalex.org/W4252559437"],"related_works":["https://openalex.org/W2392613534","https://openalex.org/W2073190017","https://openalex.org/W1515677070","https://openalex.org/W19863596","https://openalex.org/W2592236573","https://openalex.org/W1562732971","https://openalex.org/W2540896671","https://openalex.org/W2119482581","https://openalex.org/W2361533086","https://openalex.org/W2001121861"],"abstract_inverted_index":{"XML":[0,28,40,147,157],"is":[1,67],"increasingly":[2],"important":[3],"in":[4,18,84,96,116],"data":[5,158],"exchange":[6],"and":[7,26,78,112,163],"information":[8],"management.":[9],"A":[10],"great":[11],"deal":[12],"of":[13,93,104,135,143,166],"efforts":[14],"have":[15],"been":[16],"spent":[17],"developing":[19],"efficient":[20],"techniques":[21,145],"for":[22,59],"storing,":[23],"querying,":[24],"indexing":[25],"accessing":[27],"documents.":[29,98],"In":[30,42],"this":[31],"paper":[32],"we":[33],"propose":[34],"a":[35,85],"new":[36],"approach":[37,66],"to":[38,44,68,82,125,146],"clustering":[39],"data.":[41],"contrast":[43],"previous":[45],"work,":[46],"which":[47],"focused":[48],"on":[49,90,152],"documents":[50,60,81,153],"defined":[51],"by":[52,74,107],"different":[53],"DTDs,":[54],"the":[55,62,80,91,94,97,102,105,114,117,133,136,161,167],"proposed":[56,168],"method":[57],"works":[58],"with":[61,128],"same":[63],"DTD.":[64],"Our":[65],"extract":[69],"features":[70,95],"from":[71,155],"documents,":[72],"modeled":[73],"ordered":[75],"labeled":[76],"trees,":[77],"transform":[79],"vectors":[83,106,115,127],"high-dimensional":[86],"Euclidean":[87],"space":[88],"based":[89,151],"occurrences":[92],"We":[99,138],"then":[100],"reduce":[101],"dimensionality":[103],"principal":[108],"component":[109],"analysis":[110],"(PCA)":[111],"cluster":[113],"reduced":[118],"dimensional":[119],"space.":[120],"The":[121],"PCA":[122],"enables":[123],"one":[124],"identify":[126],"co-occurrent":[129],"features,":[130],"thereby":[131],"enhancing":[132],"accuracy":[134],"clustering.":[137],"also":[139],"discuss":[140],"an":[141],"extension":[142],"our":[144],"retrieval.":[148],"Experimental":[149],"results":[150],"obtained":[154],"Wisconsin's":[156],"bank":[159],"show":[160],"effectiveness":[162],"good":[164],"performance":[165],"techniques.":[169]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
