{"id":"https://openalex.org/W1530666358","doi":"https://doi.org/10.1007/978-3-540-73888-6_46","title":"Unsupervised Classification of Text-Centric XML Document Collections","display_name":"Unsupervised Classification of Text-Centric XML Document Collections","publication_year":2007,"publication_date":"2007-08-18","ids":{"openalex":"https://openalex.org/W1530666358","doi":"https://doi.org/10.1007/978-3-540-73888-6_46","mag":"1530666358"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-540-73888-6_46","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-540-73888-6_46","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033491986","display_name":"Antoine Doucet","orcid":"https://orcid.org/0000-0001-6160-3356"},"institutions":[{"id":"https://openalex.org/I2800229700","display_name":"Helsinki Institute of Physics","ror":"https://ror.org/01x2x1522","country_code":"FI","type":"facility","lineage":["https://openalex.org/I133731052","https://openalex.org/I2800229700","https://openalex.org/I63548447","https://openalex.org/I94722563","https://openalex.org/I9927081"]},{"id":"https://openalex.org/I2802519937","display_name":"Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires","ror":"https://ror.org/00myn0z94","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I205703379","https://openalex.org/I2802204017","https://openalex.org/I2802519937","https://openalex.org/I28221208","https://openalex.org/I4210127572","https://openalex.org/I4210159245","https://openalex.org/I56067802"]},{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI","FR"],"is_corresponding":true,"raw_author_name":"Antoine Doucet","raw_affiliation_strings":["Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki, Finland","IRISA-INRIA, Campus de Beaulieu, F-35042 Rennes Cedex, France","Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki","IRISA-INRIA, Campus de Beaulieu, F-35042 Rennes Cedex"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki, Finland","institution_ids":["https://openalex.org/I2800229700","https://openalex.org/I133731052"]},{"raw_affiliation_string":"IRISA-INRIA, Campus de Beaulieu, F-35042 Rennes Cedex, France","institution_ids":["https://openalex.org/I1326498283","https://openalex.org/I2802519937"]},{"raw_affiliation_string":"Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki","institution_ids":["https://openalex.org/I133731052"]},{"raw_affiliation_string":"IRISA-INRIA, Campus de Beaulieu, F-35042 Rennes Cedex","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051929441","display_name":"Miro Lehtonen","orcid":null},"institutions":[{"id":"https://openalex.org/I2800229700","display_name":"Helsinki Institute of Physics","ror":"https://ror.org/01x2x1522","country_code":"FI","type":"facility","lineage":["https://openalex.org/I133731052","https://openalex.org/I2800229700","https://openalex.org/I63548447","https://openalex.org/I94722563","https://openalex.org/I9927081"]},{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Miro Lehtonen","raw_affiliation_strings":["Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki, Finland","Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki, Finland","institution_ids":["https://openalex.org/I2800229700","https://openalex.org/I133731052"]},{"raw_affiliation_string":"Department of Computer Science, P. O. Box 68 (Gustaf H\u00e4llstr\u00f6min katu 2b), FI\u201300014 University of Helsinki","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033491986"],"corresponding_institution_ids":["https://openalex.org/I1326498283","https://openalex.org/I133731052","https://openalex.org/I2800229700","https://openalex.org/I2802519937"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":1.832,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.87063931,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"497","last_page":"509"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8981526494026184},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.738711416721344},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.696026086807251},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.6574903130531311},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.6188204288482666},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6037746071815491},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.5914186239242554},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4355146288871765},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4214290380477905},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4090608060359955},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34489691257476807},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.17118322849273682}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8981526494026184},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.738711416721344},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.696026086807251},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.6574903130531311},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.6188204288482666},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6037746071815491},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.5914186239242554},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4355146288871765},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4214290380477905},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4090608060359955},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34489691257476807},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.17118322849273682},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-3-540-73888-6_46","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-540-73888-6_46","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5899999737739563,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W50359972","https://openalex.org/W154511698","https://openalex.org/W178859989","https://openalex.org/W1532901860","https://openalex.org/W1592758009","https://openalex.org/W1628571627","https://openalex.org/W1892314125","https://openalex.org/W1920489833","https://openalex.org/W1974862676","https://openalex.org/W2031340710","https://openalex.org/W2041565863","https://openalex.org/W2045117606","https://openalex.org/W2081429741","https://openalex.org/W2084370216","https://openalex.org/W2134222424","https://openalex.org/W2145036943","https://openalex.org/W2184757518","https://openalex.org/W2482331001","https://openalex.org/W2561675875","https://openalex.org/W2997501009"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703"],"abstract_inverted_index":null,"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
