{"id":"https://openalex.org/W2040221623","doi":"https://doi.org/10.1080/15427951.2006.10129133","title":"Cluster Generation and Labeling for Web Snippets: A Fast, Accurate Hierarchical Solution","display_name":"Cluster Generation and Labeling for Web Snippets: A Fast, Accurate Hierarchical Solution","publication_year":2006,"publication_date":"2006-01-01","ids":{"openalex":"https://openalex.org/W2040221623","doi":"https://doi.org/10.1080/15427951.2006.10129133","mag":"2040221623"},"language":"en","primary_location":{"id":"doi:10.1080/15427951.2006.10129133","is_oa":true,"landing_page_url":"https://doi.org/10.1080/15427951.2006.10129133","pdf_url":"https://www.internetmathematicsjournal.com/article/1430.pdf","source":{"id":"https://openalex.org/S19474917","display_name":"Internet Mathematics","issn_l":"1542-7951","issn":["1542-7951","1944-9488"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Internet Mathematics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.internetmathematicsjournal.com/article/1430.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033343264","display_name":"Filippo Geraci","orcid":"https://orcid.org/0000-0001-6993-6761"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Filippo Geraci","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028551838","display_name":"Marco Pellegrini","orcid":"https://orcid.org/0000-0003-3151-9481"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marco Pellegrini","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080411641","display_name":"Marco Maggini","orcid":"https://orcid.org/0000-0002-6428-1265"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marco Maggini","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5063975186","display_name":"Fabrizio Sebastiani","orcid":"https://orcid.org/0000-0003-4221-6427"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fabrizio Sebastiani","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5033343264"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.1884,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.97817532,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"3","issue":"4","first_page":"413","last_page":"443"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7691693305969238},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7646790742874146},{"id":"https://openalex.org/keywords/snippet","display_name":"Snippet","score":0.6482419967651367},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6343194246292114},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5480738282203674},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5441362857818604},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.5314074158668518},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.47966814041137695},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.4620171785354614},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4468492269515991},{"id":"https://openalex.org/keywords/directory","display_name":"Directory","score":0.4301111102104187},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3419830799102783},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2055613100528717}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7691693305969238},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7646790742874146},{"id":"https://openalex.org/C2777822670","wikidata":"https://www.wikidata.org/wiki/Q1120538","display_name":"Snippet","level":2,"score":0.6482419967651367},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6343194246292114},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5480738282203674},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5441362857818604},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.5314074158668518},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.47966814041137695},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.4620171785354614},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4468492269515991},{"id":"https://openalex.org/C2777683733","wikidata":"https://www.wikidata.org/wiki/Q201456","display_name":"Directory","level":2,"score":0.4301111102104187},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3419830799102783},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2055613100528717},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1080/15427951.2006.10129133","is_oa":true,"landing_page_url":"https://doi.org/10.1080/15427951.2006.10129133","pdf_url":"https://www.internetmathematicsjournal.com/article/1430.pdf","source":{"id":"https://openalex.org/S19474917","display_name":"Internet Mathematics","issn_l":"1542-7951","issn":["1542-7951","1944-9488"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Internet Mathematics","raw_type":"journal-article"},{"id":"pmh:oai:CULeuclid:euclid.im/1227025008","is_oa":false,"landing_page_url":"http://projecteuclid.org/euclid.im/1227025008","pdf_url":null,"source":{"id":"https://openalex.org/S4306400787","display_name":"Project Euclid (Cornell University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:dnet:people______::147a1f4c559cf35f41f496b4eea803ab","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S7407055261","display_name":"ISTI Open Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"INTERNET MATHEMATICS, vol. 3 (issue 4), pp. 413-444","raw_type":"Journal article"},{"id":"pmh:oai:usiena-air.unisi.it:11365/29631","is_oa":false,"landing_page_url":"http://hdl.handle.net/11365/29631","pdf_url":null,"source":{"id":"https://openalex.org/S4377196319","display_name":"Use Siena air (University of Siena)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I102064193","host_organization_name":"University of Siena","host_organization_lineage":["https://openalex.org/I102064193"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1080/15427951.2006.10129133","is_oa":true,"landing_page_url":"https://doi.org/10.1080/15427951.2006.10129133","pdf_url":"https://www.internetmathematicsjournal.com/article/1430.pdf","source":{"id":"https://openalex.org/S19474917","display_name":"Internet Mathematics","issn_l":"1542-7951","issn":["1542-7951","1944-9488"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Internet Mathematics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2040221623.pdf","grobid_xml":"https://content.openalex.org/works/W2040221623.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W115508274","https://openalex.org/W190289764","https://openalex.org/W192724328","https://openalex.org/W285471286","https://openalex.org/W1487761086","https://openalex.org/W1489608363","https://openalex.org/W1500092210","https://openalex.org/W1521439890","https://openalex.org/W1522930108","https://openalex.org/W1530232144","https://openalex.org/W1565727378","https://openalex.org/W1660390307","https://openalex.org/W1973264045","https://openalex.org/W1975152892","https://openalex.org/W1980317569","https://openalex.org/W1983197449","https://openalex.org/W1996764654","https://openalex.org/W2001907516","https://openalex.org/W2012833704","https://openalex.org/W2038519936","https://openalex.org/W2046598060","https://openalex.org/W2073849744","https://openalex.org/W2074449313","https://openalex.org/W2075880171","https://openalex.org/W2095897464","https://openalex.org/W2099111195","https://openalex.org/W2100958137","https://openalex.org/W2104890789","https://openalex.org/W2110441437","https://openalex.org/W2125095221","https://openalex.org/W2126802698","https://openalex.org/W2127218421","https://openalex.org/W2141729166","https://openalex.org/W2143079043","https://openalex.org/W2147159144","https://openalex.org/W2150593711","https://openalex.org/W2155194165","https://openalex.org/W2160799467","https://openalex.org/W2162495634","https://openalex.org/W2435251607","https://openalex.org/W4230940751","https://openalex.org/W4236140805","https://openalex.org/W4241122026","https://openalex.org/W4243009576","https://openalex.org/W4252666170","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W1484298060","https://openalex.org/W3169572532","https://openalex.org/W2176930778","https://openalex.org/W4237492828","https://openalex.org/W2794433798","https://openalex.org/W78181647","https://openalex.org/W1716487511","https://openalex.org/W2130194910","https://openalex.org/W2121996546","https://openalex.org/W2189374779"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"Armil,":[3],"a":[4,31,56,102,126,155,190,204],"meta-search":[5],"engine":[6],"that":[7],"groups":[8],"the":[9,28,36,46,60,66,69,75,80,84,106,129,136,143,162,187,196,199],"web":[10,149],"snippets":[11,81,159,226],"returned":[12],"by":[13,25,77,83,99,117,182],"auxiliary":[14,85],"search":[15,86],"engines":[16],"into":[17],"disjoint":[18],"labeled":[19],"clusters.":[20],"The":[21],"cluster":[22,40,53,200],"labels":[23],"generated":[24],"Armil":[26,140,177,216],"provide":[27],"user":[29,192],"with":[30,212],"compact":[32],"guide":[33],"to":[34,41,168,224],"assessing":[35],"relevance":[37],"of":[38,62,94,101,105,128,139,158,174,189,194,222],"each":[39],"his/her":[42],"information":[43,130],"need.":[44],"Striking":[45],"right":[47],"balance":[48],"between":[49],"running":[50],"time":[51],"and":[52,68,88,120,198,219],"well-formedness":[54],"was":[55],"key":[57],"point":[58],"in":[59,148,227],"design":[61],"our":[63],"system.":[64],"Both":[65],"clustering":[67,137,175,197,218],"labeling":[70,114,201,220],"tasks":[71],"are":[72],"performed":[73,98],"on":[74,125],"fly":[76],"processing":[78],"only":[79],"provided":[82],"engines,":[87],"they":[89],"use":[90],"no":[91],"external":[92],"sources":[93],"knowledge.":[95],"Clustering":[96],"is":[97,115],"means":[100],"fast":[103],"version":[104],"furthest-pointfirst":[107],"algorithm":[108],"for":[109],"metric":[110],"_k_-center":[111],"clustering.":[112],"Cluster":[113],"achieved":[116],"combining":[118],"intra-cluster":[119],"inter-cluster":[121],"term":[122],"extraction":[123],"based":[124],"variant":[127],"gain":[131],"measure.":[132],"We":[133,184],"have":[134],"tested":[135],"effectiveness":[138],"against":[141],"Vivisimo,":[142],"_de":[144],"facto_":[145],"industrial":[146],"standard":[147,205],"snippet":[150],"clustering,":[151],"using":[152],"as":[153],"benchmark":[154],"comprehensive":[156],"set":[157],"obtained":[160],"from":[161],"Open":[163],"Directory":[164],"Project":[165],"hierarchy.":[166],"According":[167],"two":[169],"widely":[170],"accepted":[171],"\"external\"":[172],"metrics":[173],"quality,":[176],"achieves":[178],"better":[179],"performance":[180],"levels":[181],"10%.":[183],"also":[185],"report":[186],"results":[188],"thorough":[191],"evaluation":[193],"both":[195],"algorithms.":[202],"On":[203],"desktop":[206],"PC":[207],"(AMD":[208],"Athlon":[209],"1-Ghz":[210],"Clock":[211],"750":[213],"Mbytes":[214],"RAM),":[215],"performs":[217],"altogether":[221],"up":[223],"200":[225],"less":[228],"than":[229],"one":[230],"second.":[231]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":6}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
