{"id":"https://openalex.org/W2981206054","doi":"https://doi.org/10.1515/comp-2019-0009","title":"Size Matters: The Impact of Training Size in Taxonomically-Enriched Word Embeddings","display_name":"Size Matters: The Impact of Training Size in Taxonomically-Enriched Word Embeddings","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2981206054","doi":"https://doi.org/10.1515/comp-2019-0009","mag":"2981206054"},"language":"en","primary_location":{"id":"doi:10.1515/comp-2019-0009","is_oa":true,"landing_page_url":"https://doi.org/10.1515/comp-2019-0009","pdf_url":"https://www.degruyter.com/document/doi/10.1515/comp-2019-0009/pdf","source":{"id":"https://openalex.org/S4210177004","display_name":"Open Computer Science","issn_l":"2299-1093","issn":["2299-1093"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310313990","host_organization_name":"De Gruyter","host_organization_lineage":["https://openalex.org/P4310313990"],"host_organization_lineage_names":["De Gruyter"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Open Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.degruyter.com/document/doi/10.1515/comp-2019-0009/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034969780","display_name":"Alfredo Maldonado","orcid":"https://orcid.org/0000-0001-8426-5249"},"institutions":[{"id":"https://openalex.org/I205274468","display_name":"Trinity College Dublin","ror":"https://ror.org/02tyrky19","country_code":"IE","type":"education","lineage":["https://openalex.org/I205274468"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Alfredo Maldonado","raw_affiliation_strings":["ADAPT Centre at Trinity College Dublin , Dublin , Ireland"],"affiliations":[{"raw_affiliation_string":"ADAPT Centre at Trinity College Dublin , Dublin , Ireland","institution_ids":["https://openalex.org/I205274468"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057328421","display_name":"Filip Klubi\u010dka","orcid":"https://orcid.org/0000-0001-9712-6141"},"institutions":[{"id":"https://openalex.org/I4210144925","display_name":"Technological University Dublin","ror":"https://ror.org/04t0qbt32","country_code":"IE","type":"education","lineage":["https://openalex.org/I4210144925"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Filip Klubi\u010dka","raw_affiliation_strings":["ADAPT Centre at Technological University Dublin , Dublin , Ireland"],"affiliations":[{"raw_affiliation_string":"ADAPT Centre at Technological University Dublin , Dublin , Ireland","institution_ids":["https://openalex.org/I4210144925"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079991004","display_name":"John D. Kelleher","orcid":"https://orcid.org/0000-0001-6462-3248"},"institutions":[{"id":"https://openalex.org/I4210144925","display_name":"Technological University Dublin","ror":"https://ror.org/04t0qbt32","country_code":"IE","type":"education","lineage":["https://openalex.org/I4210144925"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"John Kelleher","raw_affiliation_strings":["ADAPT Centre at Technological University Dublin , Dublin , Ireland"],"affiliations":[{"raw_affiliation_string":"ADAPT Centre at Technological University Dublin , Dublin , Ireland","institution_ids":["https://openalex.org/I4210144925"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5034969780"],"corresponding_institution_ids":["https://openalex.org/I205274468"],"apc_list":{"value":1000,"currency":"EUR","value_usd":1078},"apc_paid":{"value":1000,"currency":"EUR","value_usd":1078},"fwci":0.4336,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.72539365,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"9","issue":"1","first_page":"252","last_page":"267"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/wordnet","display_name":"WordNet","score":0.9022852778434753},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6624720096588135},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6486895680427551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5993055701255798},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5956057906150818},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.5407125949859619},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.5392218232154846},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5178340077400208},{"id":"https://openalex.org/keywords/random-walk","display_name":"Random walk","score":0.5159155130386353},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4393451511859894},{"id":"https://openalex.org/keywords/taxonomic-rank","display_name":"Taxonomic rank","score":0.43803197145462036},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16503280401229858},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14835694432258606},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1284041404724121},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.11506524682044983},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10361766815185547},{"id":"https://openalex.org/keywords/taxon","display_name":"Taxon","score":0.08914589881896973}],"concepts":[{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.9022852778434753},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6624720096588135},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6486895680427551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5993055701255798},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5956057906150818},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.5407125949859619},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.5392218232154846},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5178340077400208},{"id":"https://openalex.org/C121194460","wikidata":"https://www.wikidata.org/wiki/Q856741","display_name":"Random walk","level":2,"score":0.5159155130386353},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4393451511859894},{"id":"https://openalex.org/C189592816","wikidata":"https://www.wikidata.org/wiki/Q427626","display_name":"Taxonomic rank","level":3,"score":0.43803197145462036},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16503280401229858},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14835694432258606},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1284041404724121},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.11506524682044983},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10361766815185547},{"id":"https://openalex.org/C71640776","wikidata":"https://www.wikidata.org/wiki/Q16521","display_name":"Taxon","level":2,"score":0.08914589881896973},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1515/comp-2019-0009","is_oa":true,"landing_page_url":"https://doi.org/10.1515/comp-2019-0009","pdf_url":"https://www.degruyter.com/document/doi/10.1515/comp-2019-0009/pdf","source":{"id":"https://openalex.org/S4210177004","display_name":"Open Computer Science","issn_l":"2299-1093","issn":["2299-1093"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310313990","host_organization_name":"De Gruyter","host_organization_lineage":["https://openalex.org/P4310313990"],"host_organization_lineage_names":["De Gruyter"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Open Computer Science","raw_type":"journal-article"},{"id":"pmh:oai:arrow.tudublin.ie:scschcomart-1090","is_oa":true,"landing_page_url":"https://arrow.tudublin.ie/scschcomart/83","pdf_url":"https://arrow.tudublin.ie/scschcomart/83","source":{"id":"https://openalex.org/S4377196307","display_name":"Arrow - TU Dublin (Technological University Dublin)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210144925","host_organization_name":"Technological University Dublin","host_organization_lineage":["https://openalex.org/I4210144925"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Articles","raw_type":"article"},{"id":"pmh:oai:arrow.dit.ie:scschcomart-1090","is_oa":true,"landing_page_url":"https://arrow.dit.ie/scschcomart/83","pdf_url":null,"source":{"id":"https://openalex.org/S4306402084","display_name":"ARROW@Dublin Institute of Technology (Dublin Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I115570527","host_organization_name":"Dublin Institute of Technology","host_organization_lineage":["https://openalex.org/I115570527"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Articles","raw_type":"article"},{"id":"pmh:oai:doaj.org/article:9a8eb5b941bc4dce88079bf39a783089","is_oa":true,"landing_page_url":"https://doaj.org/article/9a8eb5b941bc4dce88079bf39a783089","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Open Computer Science, Vol 9, Iss 1, Pp 252-267 (2019)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1515/comp-2019-0009","is_oa":true,"landing_page_url":"https://doi.org/10.1515/comp-2019-0009","pdf_url":"https://www.degruyter.com/document/doi/10.1515/comp-2019-0009/pdf","source":{"id":"https://openalex.org/S4210177004","display_name":"Open Computer Science","issn_l":"2299-1093","issn":["2299-1093"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310313990","host_organization_name":"De Gruyter","host_organization_lineage":["https://openalex.org/P4310313990"],"host_organization_lineage_names":["De Gruyter"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Open Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G228147372","display_name":null,"funder_award_id":"Grant 13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G2563910032","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G3993907298","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2981206054.pdf","grobid_xml":"https://content.openalex.org/works/W2981206054.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W1503259811","https://openalex.org/W1662133657","https://openalex.org/W1814992895","https://openalex.org/W1854884267","https://openalex.org/W2008434289","https://openalex.org/W2130199334","https://openalex.org/W2251803266","https://openalex.org/W2294429012","https://openalex.org/W2560366237","https://openalex.org/W2562366217","https://openalex.org/W2592493346","https://openalex.org/W2605630617","https://openalex.org/W2620558438","https://openalex.org/W2753628379","https://openalex.org/W2890249124","https://openalex.org/W2962684341","https://openalex.org/W2963639153","https://openalex.org/W2964270525","https://openalex.org/W3098865490","https://openalex.org/W3105096579","https://openalex.org/W4213168938"],"related_works":["https://openalex.org/W2043952800","https://openalex.org/W2047143235","https://openalex.org/W2121846020","https://openalex.org/W1572864191","https://openalex.org/W2957377172","https://openalex.org/W2165693052","https://openalex.org/W2164877079","https://openalex.org/W2113471940","https://openalex.org/W2569513598","https://openalex.org/W2907883452"],"abstract_inverted_index":{"Abstract":[0],"Word":[1],"embeddings":[2,60,72,99,116,119,176,189],"trained":[3,63,122,177,190],"on":[4,21,38,64,123,178,191,202],"natural":[5,146,180,193,223],"corpora":[6,181,194],"(e.g.,":[7],"newspaper":[8],"collections,":[9],"Wikipedia":[10],"or":[11,29,49],"the":[12,142,145,150,154,162,183,219,222,226,229,241,258,272],"Web)":[13],"excel":[14],"in":[15,73,79,88,161,168,217,254,264,271],"capturing":[16],"thematic":[17,80],"similarity":[18,75,92,170,204],"(\u201ctopical":[19],"relatedness\u201d)":[20],"word":[22],"pairs":[23,39],"such":[24],"as":[25],"\u2018coffee\u2019":[26],"and":[27,31,45,51,138,148,225,246],"\u2018cup\u2019":[28,44],"\u2019bus\u2019":[30],"\u2018road\u2019.":[32],"However,":[33],"they":[34],"are":[35],"less":[36],"successful":[37],"showing":[40],"taxonomic":[41,74,101,108,118,186,203,269],"similarity,":[42],"like":[43,105],"\u2018mug\u2019":[46],"(near":[47],"synonyms)":[48],"\u2018bus\u2019":[50],"\u2018train\u2019":[52],"(types":[53],"of":[54,67,91,126,135,144,149,153,164,208,221,228,260],"public":[55],"transport).":[56],"Moreover,":[57],"purely":[58],"taxonomy-based":[59],"(e.g.":[61,120],"those":[62,121],"a":[65,124,132,158,255],"random-walk":[66,125,151,230],"WordNet\u2019s":[68,127],"structure)":[69],"outperform":[70],"natural-corpus":[71,98,115],"but":[76],"underperform":[77],"them":[78],"similarity.":[81],"Previous":[82],"work":[83],"suggests":[84],"that":[85,140,175,211],"performance":[86,163],"gains":[87],"both":[89,141,169],"types":[90],"can":[93,110],"be":[94,111,215],"achieved":[95],"by":[96,113],"enriching":[97],"with":[100,117],"information":[102],"from":[103,185,197],"taxonomies":[104],"Word-Net.":[106],"This":[107,129],"enrichment":[109,187,199],"done":[112],"combining":[114],"structure).":[128],"paper":[130],"conducts":[131],"deep":[133],"analysis":[134],"this":[136,198,209],"assumption":[137],"shows":[139],"size":[143,220,227],"corpus":[147,224],"coverage":[152],"WordNet":[155,242,262],"structure":[156,243],"play":[157],"crucial":[159],"role":[160],"combined":[165],"(enriched)":[166],"vectors":[167],"tasks.":[171,205],"Specifically,":[172],"we":[173,237],"show":[174],"medium-sized":[179],"benefit":[182,196],"most":[184],"whilst":[188,240],"large":[192],"only":[195],"when":[200],"evaluated":[201],"The":[206],"implication":[207],"is":[210,244,248],"care":[212],"has":[213],"to":[214,232,250],"taken":[216],"controlling":[218],"used":[231],"train":[233],"vectors.":[234],"In":[235],"addition,":[236],"find":[238],"that,":[239],"finite":[245],"it":[247,253],"possible":[249],"fully":[251],"traverse":[252],"single":[256],"pass,":[257],"repetition":[259],"well-connected":[261],"concepts":[263],"extended":[265],"random-walks":[266],"effectively":[267],"reinforces":[268],"relations":[270],"learned":[273],"embeddings.":[274]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2019-10-25T00:00:00"}
