{"id":"https://openalex.org/W2991129546","doi":"https://doi.org/10.26615/978-954-452-056-4_150","title":"Bigger versus Similar: Selecting a Background Corpus for First Story Detection Based on Distributional Similarity","display_name":"Bigger versus Similar: Selecting a Background Corpus for First Story Detection Based on Distributional Similarity","publication_year":2019,"publication_date":"2019-10-22","ids":{"openalex":"https://openalex.org/W2991129546","doi":"https://doi.org/10.26615/978-954-452-056-4_150","mag":"2991129546"},"language":"en","primary_location":{"id":"doi:10.26615/978-954-452-056-4_150","is_oa":true,"landing_page_url":"http://doi.org/10.26615/978-954-452-056-4_150","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_150","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.26615/978-954-452-056-4_150","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100455805","display_name":"Fei Wang","orcid":"https://orcid.org/0000-0002-2212-3947"},"institutions":[{"id":"https://openalex.org/I4210144925","display_name":"Technological University Dublin","ror":"https://ror.org/04t0qbt32","country_code":"IE","type":"education","lineage":["https://openalex.org/I4210144925"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Fei Wang","raw_affiliation_strings":["Technological University Dublin ADAPT Research Centre"],"affiliations":[{"raw_affiliation_string":"Technological University Dublin ADAPT Research Centre","institution_ids":["https://openalex.org/I4210144925"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009346892","display_name":"Robert Ross","orcid":"https://orcid.org/0000-0001-7088-273X"},"institutions":[{"id":"https://openalex.org/I4210144925","display_name":"Technological University Dublin","ror":"https://ror.org/04t0qbt32","country_code":"IE","type":"education","lineage":["https://openalex.org/I4210144925"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Robert J. Ross","raw_affiliation_strings":["Technological University Dublin ADAPT Research Centre"],"affiliations":[{"raw_affiliation_string":"Technological University Dublin ADAPT Research Centre","institution_ids":["https://openalex.org/I4210144925"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079991004","display_name":"John D. Kelleher","orcid":"https://orcid.org/0000-0001-6462-3248"},"institutions":[{"id":"https://openalex.org/I4210144925","display_name":"Technological University Dublin","ror":"https://ror.org/04t0qbt32","country_code":"IE","type":"education","lineage":["https://openalex.org/I4210144925"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"John D. Kelleher","raw_affiliation_strings":["Technological University Dublin ADAPT Research Centre"],"affiliations":[{"raw_affiliation_string":"Technological University Dublin ADAPT Research Centre","institution_ids":["https://openalex.org/I4210144925"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5079991004"],"corresponding_institution_ids":["https://openalex.org/I4210144925"],"apc_list":null,"apc_paid":null,"fwci":0.1447,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59998301,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1312","last_page":"1320"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7339430451393127},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6573313474655151},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6349478960037231},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6074742078781128},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5843865275382996},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5727136731147766},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.5708694458007812},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5533855557441711},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5099197626113892},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.4775881767272949},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.47516578435897827},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4222426414489746},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16494476795196533},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11003854870796204}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7339430451393127},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6573313474655151},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6349478960037231},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6074742078781128},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5843865275382996},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5727136731147766},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.5708694458007812},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5533855557441711},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5099197626113892},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.4775881767272949},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.47516578435897827},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4222426414489746},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16494476795196533},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11003854870796204},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.26615/978-954-452-056-4_150","is_oa":true,"landing_page_url":"http://doi.org/10.26615/978-954-452-056-4_150","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_150","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},{"id":"pmh:oai:arrow.tudublin.ie:scschcomart-1100","is_oa":true,"landing_page_url":"https://arrow.tudublin.ie/scschcomart/91","pdf_url":"https://arrow.tudublin.ie/scschcomart/91","source":{"id":"https://openalex.org/S4377196307","display_name":"Arrow - TU Dublin (Technological University Dublin)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210144925","host_organization_name":"Technological University Dublin","host_organization_lineage":["https://openalex.org/I4210144925"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Articles","raw_type":"article"},{"id":"pmh:oai:arrow.tudublin.ie:scschcomcon-1285","is_oa":true,"landing_page_url":"https://arrow.tudublin.ie/scschcomcon/277","pdf_url":"https://arrow.tudublin.ie/scschcomcon/277","source":{"id":"https://openalex.org/S4377196307","display_name":"Arrow - TU Dublin (Technological University Dublin)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210144925","host_organization_name":"Technological University Dublin","host_organization_lineage":["https://openalex.org/I4210144925"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Conference papers","raw_type":"conferencepaper"},{"id":"pmh:oai:arrow.tudublin.ie:scschcomart-1101","is_oa":false,"landing_page_url":"https://arrow.tudublin.ie/cgi/viewcontent.cgi?article=1101&context=scschcomart","pdf_url":null,"source":{"id":"https://openalex.org/S4377196307","display_name":"Arrow - TU Dublin (Technological University Dublin)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210144925","host_organization_name":"Technological University Dublin","host_organization_lineage":["https://openalex.org/I4210144925"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Articles","raw_type":"article"}],"best_oa_location":{"id":"doi:10.26615/978-954-452-056-4_150","is_oa":true,"landing_page_url":"http://doi.org/10.26615/978-954-452-056-4_150","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_150","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7599999904632568,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G228147372","display_name":null,"funder_award_id":"Grant 13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G2546278896","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320336201","funder_display_name":"ADAPT - Centre for Digital Content Technology"},{"id":"https://openalex.org/G3993907298","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320336201","display_name":"ADAPT - Centre for Digital Content Technology","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2991129546.pdf","grobid_xml":"https://content.openalex.org/works/W2991129546.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W1586405805","https://openalex.org/W1998224037","https://openalex.org/W2007760849","https://openalex.org/W2086661708","https://openalex.org/W2086925418","https://openalex.org/W2105846817","https://openalex.org/W2132870739","https://openalex.org/W2134273450","https://openalex.org/W2135909747","https://openalex.org/W2140427797","https://openalex.org/W2250752175","https://openalex.org/W2378208052","https://openalex.org/W2462693065","https://openalex.org/W2606089314","https://openalex.org/W2614548997","https://openalex.org/W2724951827","https://openalex.org/W2789847814","https://openalex.org/W2899642745","https://openalex.org/W4210984920","https://openalex.org/W4287901611","https://openalex.org/W4300694029"],"related_works":["https://openalex.org/W2349784553","https://openalex.org/W3022596247","https://openalex.org/W2601444686","https://openalex.org/W42295635","https://openalex.org/W4307058054","https://openalex.org/W1973996291","https://openalex.org/W4292238148","https://openalex.org/W4323660495","https://openalex.org/W2330575325","https://openalex.org/W2385319785"],"abstract_inverted_index":{"The":[0],"current":[1],"state":[2],"of":[3,68,91,100,115,122,167,174],"the":[4,29,36,46,63,69,88,98,120,126,154,172],"art":[5],"for":[6,103,108,197],"First":[7],"Story":[8],"Detection":[9],"(FSD)":[10],"are":[11],"nearest":[12],"neighbourbased":[13],"models":[14,26,147],"with":[15],"traditional":[16],"term":[17,39,58,161],"vector":[18],"representations;":[19],"however,":[20],"one":[21],"challenge":[22],"faced":[23],"by":[24,35],"FSD":[25,155,169],"is":[27,32,94,164],"that":[28,160,181],"document":[30],"representation":[31],"usually":[33],"defined":[34],"vocabulary":[37],"and":[38,60,125],"frequency":[40],"from":[41],"a":[42,106,113,141,182,192],"background":[43,48,137,151],"corpus.":[44,143],"Consequently,":[45],"ideal":[47],"corpus":[49,186,196],"should":[50],"arguably":[51],"be":[52,79,188],"both":[53],"large-scale":[54],"to":[55,62,117,140,153],"ensure":[56],"adequate":[57],"coverage,":[59],"similar":[61],"target":[64,142],"domain":[65],"in":[66,82],"terms":[67,93,102,124],"language":[70],"distribution.":[71],"However,":[72],"given":[73],"these":[74,132],"two":[75],"factors":[76],"cannot":[77],"always":[78],"mutually":[80],"satisfied,":[81],"this":[83],"paper":[84],"we":[85,111,134,179],"examine":[86],"whether":[87],"distributional":[89,127,162],"similarity":[90,128,163],"common":[92,101,123,175],"more":[95,165,189],"important":[96],"than":[97,171,191],"scale":[99,121,173],"FSD.":[104,198],"As":[105],"basis":[107],"our":[109],"analysis":[110],"propose":[112],"set":[114],"metrics":[116,133],"quantitatively":[118],"measure":[119],"between":[129],"corpora.":[130],"Using":[131],"rank":[135],"different":[136,150],"corpora":[138,152],"relative":[139],"We":[144],"also":[145],"apply":[146],"based":[148],"on":[149],"task.":[156],"Our":[157],"results":[158],"show":[159],"predictive":[166],"good":[168],"performance":[170],"terms;":[176],"and,":[177],"thus":[178],"demonstrate":[180],"smaller":[183],"recent":[184],"domain-related":[185],"will":[187],"suitable":[190],"very":[193],"largescale":[194],"general":[195]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
