{"id":"https://openalex.org/W2250246399","doi":"https://doi.org/10.3115/v1/e14-4001","title":"Easy Web Search Results Clustering: When Baselines Can Reach State-of-the-Art Algorithms","display_name":"Easy Web Search Results Clustering: When Baselines Can Reach State-of-the-Art Algorithms","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2250246399","doi":"https://doi.org/10.3115/v1/e14-4001","mag":"2250246399"},"language":"en","primary_location":{"id":"doi:10.3115/v1/e14-4001","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/e14-4001","pdf_url":"https://aclanthology.org/E14-4001.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, volume 2: Short Papers","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/E14-4001.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027337246","display_name":"Jos\u00e9 G. Moreno","orcid":"https://orcid.org/0000-0002-8852-5797"},"institutions":[{"id":"https://openalex.org/I4210139483","display_name":"GREYC","ror":"https://ror.org/043749971","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I194210350","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210139483","https://openalex.org/I4210159245","https://openalex.org/I98702875"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Jose G. Moreno","raw_affiliation_strings":["Equipe Hultech - Laboratoire GREYC - UMR6072 (France)"],"affiliations":[{"raw_affiliation_string":"Equipe Hultech - Laboratoire GREYC - UMR6072 (France)","institution_ids":["https://openalex.org/I4210139483"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043440123","display_name":"Ga\u00ebl Dias","orcid":"https://orcid.org/0000-0002-5840-1603"},"institutions":[{"id":"https://openalex.org/I4210139483","display_name":"GREYC","ror":"https://ror.org/043749971","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I194210350","https://openalex.org/I4210105918","https://openalex.org/I4210105918","https://openalex.org/I4210139483","https://openalex.org/I4210159245","https://openalex.org/I98702875"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ga\u00ebl Dias","raw_affiliation_strings":["Equipe Hultech - Laboratoire GREYC - UMR6072 (France)","GREYC - Groupe de Recherche en Informatique, Image et Instrumentation de Caen (Boulevard du Mar\u00e9chal Juin - 14050 CAEN Cedex - France)"],"affiliations":[{"raw_affiliation_string":"Equipe Hultech - Laboratoire GREYC - UMR6072 (France)","institution_ids":["https://openalex.org/I4210139483"]},{"raw_affiliation_string":"GREYC - Groupe de Recherche en Informatique, Image et Instrumentation de Caen (Boulevard du Mar\u00e9chal Juin - 14050 CAEN Cedex - France)","institution_ids":["https://openalex.org/I4210139483"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5027337246"],"corresponding_institution_ids":["https://openalex.org/I4210139483"],"apc_list":null,"apc_paid":null,"fwci":2.4208367,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.9167507,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.838093638420105},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.804757833480835},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.7699465751647949},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7253557443618774},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5859532952308655},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4862869679927826},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4409297704696655},{"id":"https://openalex.org/keywords/cascade","display_name":"Cascade","score":0.4407517910003662},{"id":"https://openalex.org/keywords/state-of-art","display_name":"State of art","score":0.41747233271598816},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3508630692958832},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.22320160269737244},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.1287814974784851},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07464301586151123}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.838093638420105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.804757833480835},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.7699465751647949},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7253557443618774},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5859532952308655},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4862869679927826},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4409297704696655},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.4407517910003662},{"id":"https://openalex.org/C3018574109","wikidata":"https://www.wikidata.org/wiki/Q329338","display_name":"State of art","level":2,"score":0.41747233271598816},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3508630692958832},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.22320160269737244},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.1287814974784851},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07464301586151123},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C42360764","wikidata":"https://www.wikidata.org/wiki/Q83588","display_name":"Chemical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3115/v1/e14-4001","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/e14-4001","pdf_url":"https://aclanthology.org/E14-4001.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, volume 2: Short Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.3115/v1/e14-4001","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/e14-4001","pdf_url":"https://aclanthology.org/E14-4001.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, volume 2: Short Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2250246399.pdf","grobid_xml":"https://content.openalex.org/works/W2250246399.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W1533168581","https://openalex.org/W1607159943","https://openalex.org/W1861576520","https://openalex.org/W2003374509","https://openalex.org/W2021457058","https://openalex.org/W2030470432","https://openalex.org/W2072240081","https://openalex.org/W2074449313","https://openalex.org/W2100958137","https://openalex.org/W2105068202","https://openalex.org/W2110441437","https://openalex.org/W2142656488","https://openalex.org/W2171040110"],"related_works":["https://openalex.org/W2153719181","https://openalex.org/W1971748923","https://openalex.org/W1566155057","https://openalex.org/W2060986072","https://openalex.org/W2052574922","https://openalex.org/W64588465","https://openalex.org/W3120641340","https://openalex.org/W2117825986","https://openalex.org/W3134067061","https://openalex.org/W2079855347"],"abstract_inverted_index":{"This":[0],"work":[1,22],"discusses":[2],"the":[3,39],"evaluation":[4],"of\r\nbaseline":[5],"algorithms":[6],"for":[7],"Web":[8],"search":[9],"results\r\nclustering.":[10],"An":[11],"analysis":[12],"is":[13],"performed\r\nover":[14],"frequently":[15],"used":[16],"baseline":[17],"algorithms\r\nand":[18],"standard":[19],"datasets.":[20],"Our":[21],"shows\r\nthat":[23],"competitive":[24],"results":[25,51],"can":[26,42],"be":[27],"obtained":[28],"by\r\neither":[29],"fine":[30],"tuning":[31],"or":[32],"performing":[33],"cascade\r\nclustering":[34],"over":[35],"well-known":[36],"algorithms.":[37,55],"In\r\nparticular,":[38],"latter":[40],"strategy":[41],"lead":[43],"to\r\na":[44],"scalable":[45],"and":[46],"real-world":[47],"solution,":[48],"which\r\nevidences":[49],"comparative":[50],"to":[52],"recent\r\ntext-based":[53],"state-of-the-art":[54]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
