{"id":"https://openalex.org/W2757490538","doi":"https://doi.org/10.18653/v1/w17-4211","title":"Unsupervised Event Clustering and Aggregation from Newswire and Web Articles","display_name":"Unsupervised Event Clustering and Aggregation from Newswire and Web Articles","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2757490538","doi":"https://doi.org/10.18653/v1/w17-4211","mag":"2757490538"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-4211","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-4211","pdf_url":"https://www.aclweb.org/anthology/W17-4211.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 EMNLP Workshop: Natural Language Processing\n          meets Journalism","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W17-4211.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003763581","display_name":"Swen Ribeiro","orcid":null},"institutions":[{"id":"https://openalex.org/I102197404","display_name":"Universit\u00e9 Paris-Sud","ror":"https://ror.org/028rypz17","country_code":"FR","type":"education","lineage":["https://openalex.org/I102197404"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I2738703131","display_name":"Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives","ror":"https://ror.org/00jjx8s55","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131"]},{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I4210085861","display_name":"Laboratoire d'Int\u00e9gration des Syst\u00e8mes et des Technologies","ror":"https://ror.org/000dbcc61","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131","https://openalex.org/I2738703131","https://openalex.org/I277688954","https://openalex.org/I4210085861","https://openalex.org/I4210117989"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Swen Ribeiro","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay","CEA, LIST, Gif-sur-Yvette, F-91191 France","LIMSI, CNRS Univ. Paris-Sud","CNRS Univ. Paris-Sud"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay","institution_ids":["https://openalex.org/I277688954"]},{"raw_affiliation_string":"CEA, LIST, Gif-sur-Yvette, F-91191 France","institution_ids":["https://openalex.org/I2738703131","https://openalex.org/I4210085861"]},{"raw_affiliation_string":"LIMSI, CNRS Univ. Paris-Sud","institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485"]},{"raw_affiliation_string":"CNRS Univ. Paris-Sud","institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088565202","display_name":"Olivier Ferret","orcid":"https://orcid.org/0000-0003-0755-2361"},"institutions":[{"id":"https://openalex.org/I102197404","display_name":"Universit\u00e9 Paris-Sud","ror":"https://ror.org/028rypz17","country_code":"FR","type":"education","lineage":["https://openalex.org/I102197404"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I2738703131","display_name":"Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives","ror":"https://ror.org/00jjx8s55","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131"]},{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I4210085861","display_name":"Laboratoire d'Int\u00e9gration des Syst\u00e8mes et des Technologies","ror":"https://ror.org/000dbcc61","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131","https://openalex.org/I2738703131","https://openalex.org/I277688954","https://openalex.org/I4210085861","https://openalex.org/I4210117989"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Olivier Ferret","raw_affiliation_strings":["LIMSI, CNRS Univ. Paris-Sud","CEA, LIST, Gif-sur-Yvette, F-91191 France","CNRS Univ. Paris-Sud","Universit\u00e9 Paris-Saclay"],"affiliations":[{"raw_affiliation_string":"LIMSI, CNRS Univ. Paris-Sud","institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485"]},{"raw_affiliation_string":"CEA, LIST, Gif-sur-Yvette, F-91191 France","institution_ids":["https://openalex.org/I2738703131","https://openalex.org/I4210085861"]},{"raw_affiliation_string":"CNRS Univ. Paris-Sud","institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay","institution_ids":["https://openalex.org/I277688954"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056834851","display_name":"Xavier Tannier","orcid":"https://orcid.org/0000-0002-2452-8868"},"institutions":[{"id":"https://openalex.org/I102197404","display_name":"Universit\u00e9 Paris-Sud","ror":"https://ror.org/028rypz17","country_code":"FR","type":"education","lineage":["https://openalex.org/I102197404"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I2738703131","display_name":"Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives","ror":"https://ror.org/00jjx8s55","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131"]},{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I4210085861","display_name":"Laboratoire d'Int\u00e9gration des Syst\u00e8mes et des Technologies","ror":"https://ror.org/000dbcc61","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131","https://openalex.org/I2738703131","https://openalex.org/I277688954","https://openalex.org/I4210085861","https://openalex.org/I4210117989"]},{"id":"https://openalex.org/I4210115485","display_name":"Laboratoire d'Informatique pour la M\u00e9canique et les Sciences de l'Ing\u00e9nieur","ror":"https://ror.org/01raq4x89","country_code":"FR","type":"facility","lineage":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Xavier Tannier","raw_affiliation_strings":["CEA, LIST, Gif-sur-Yvette, F-91191 France","CNRS Univ. Paris-Sud","LIMSI, CNRS Univ. Paris-Sud","Universit\u00e9 Paris-Saclay"],"affiliations":[{"raw_affiliation_string":"CEA, LIST, Gif-sur-Yvette, F-91191 France","institution_ids":["https://openalex.org/I2738703131","https://openalex.org/I4210085861"]},{"raw_affiliation_string":"CNRS Univ. Paris-Sud","institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"LIMSI, CNRS Univ. Paris-Sud","institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I4210115485"]},{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay","institution_ids":["https://openalex.org/I277688954"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5003763581"],"corresponding_institution_ids":["https://openalex.org/I102197404","https://openalex.org/I1294671590","https://openalex.org/I2738703131","https://openalex.org/I277688954","https://openalex.org/I4210085861","https://openalex.org/I4210115485"],"apc_list":null,"apc_paid":null,"fwci":3.0352,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.93087498,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7973445653915405},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7285870313644409},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6850900650024414},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5553137063980103},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.5349838137626648},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5269278287887573},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5100022554397583},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45069485902786255},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4131110906600952},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3223947286605835}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7973445653915405},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7285870313644409},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6850900650024414},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5553137063980103},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.5349838137626648},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5269278287887573},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5100022554397583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45069485902786255},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4131110906600952},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3223947286605835},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w17-4211","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-4211","pdf_url":"https://www.aclweb.org/anthology/W17-4211.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 EMNLP Workshop: Natural Language Processing\n          meets Journalism","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:cea-01857885v1","is_oa":true,"landing_page_url":"https://cea.hal.science/cea-01857885","pdf_url":"https://cea.hal.science/cea-01857885v1/file/W17-4211","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2017 EMNLP Workshop: Natural Language Processing meets Journalism, 2017, Copenhagen, Denmark. pp.62-67, &#x27E8;10.18653/v1/W17-4211&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-4211","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-4211","pdf_url":"https://www.aclweb.org/anthology/W17-4211.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 EMNLP Workshop: Natural Language Processing\n          meets Journalism","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1275485275","display_name":"Template acquisition pour open event extraction","funder_award_id":"ANR-15-CE23-0018","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2757490538.pdf","grobid_xml":"https://content.openalex.org/works/W2757490538.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W2097776316","https://openalex.org/W2107315187","https://openalex.org/W2118928552","https://openalex.org/W2121564430","https://openalex.org/W2165268584","https://openalex.org/W2169943035","https://openalex.org/W2185606683","https://openalex.org/W2250588112","https://openalex.org/W2250820040","https://openalex.org/W2251552857","https://openalex.org/W2475245295","https://openalex.org/W2514651853","https://openalex.org/W3154788810"],"related_works":["https://openalex.org/W3037187668","https://openalex.org/W2804364458","https://openalex.org/W4298130764","https://openalex.org/W2132641928","https://openalex.org/W2090259340","https://openalex.org/W4310225030","https://openalex.org/W2083665254","https://openalex.org/W2393816671","https://openalex.org/W1534720161","https://openalex.org/W2804957450"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"present":[4],"an":[5],"unsupervised":[6],"pipeline":[7],"approach":[8,56],"for":[9],"clustering":[10],"news":[11],"articles":[12,39],"based":[13],"on":[14,57],"identified":[15],"event":[16,49],"instances":[17],"in":[18,43],"their":[19],"content.":[20],"We":[21,53],"leverage":[22],"press":[23],"agency":[24],"newswire":[25],"and":[26,34],"monolingual":[27],"word":[28],"alignment":[29],"techniques":[30],"to":[31],"build":[32],"meaningful":[33],"linguistically":[35],"varied":[36],"clusters":[37],"of":[38,46,62],"from":[40],"the":[41,44],"Web":[42,63],"perspective":[45],"a":[47,58],"broader":[48],"type":[50],"detection":[51],"task.":[52],"validate":[54],"our":[55],"manually":[59],"annotated":[60],"corpus":[61],"articles.":[64]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
