{"id":"https://openalex.org/W2970666910","doi":"https://doi.org/10.18653/v1/d19-1369","title":"Single Training Dimension Selection for Word Embedding with PCA","display_name":"Single Training Dimension Selection for Word Embedding with PCA","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970666910","doi":"https://doi.org/10.18653/v1/d19-1369","mag":"2970666910"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d19-1369","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-1369","pdf_url":"https://www.aclweb.org/anthology/D19-1369.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D19-1369.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100445279","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0002-8059-8051"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yu Wang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100445279"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2602,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.85244616,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3595","last_page":"3600"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.8470494747161865},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7892652750015259},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7296245694160461},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.715410590171814},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.6532666683197021},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6378533244132996},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.572076678276062},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5515069961547852},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4563775956630707},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.4354618787765503},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.4322356581687927},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35753971338272095},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3255622386932373},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32279402017593384},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1331624984741211}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.8470494747161865},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7892652750015259},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7296245694160461},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.715410590171814},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.6532666683197021},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6378533244132996},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.572076678276062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5515069961547852},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4563775956630707},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.4354618787765503},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.4322356581687927},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35753971338272095},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3255622386932373},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32279402017593384},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1331624984741211},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d19-1369","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-1369","pdf_url":"https://www.aclweb.org/anthology/D19-1369.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d19-1369","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-1369","pdf_url":"https://www.aclweb.org/anthology/D19-1369.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.47999998927116394}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2970666910.pdf","grobid_xml":"https://content.openalex.org/works/W2970666910.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W658020064","https://openalex.org/W947140380","https://openalex.org/W1614298861","https://openalex.org/W1663973292","https://openalex.org/W1888005072","https://openalex.org/W2046253692","https://openalex.org/W2053921957","https://openalex.org/W2097732278","https://openalex.org/W2140610559","https://openalex.org/W2153579005","https://openalex.org/W2154851992","https://openalex.org/W2250539671","https://openalex.org/W2251012068","https://openalex.org/W2254973503","https://openalex.org/W2509386510","https://openalex.org/W2571859396","https://openalex.org/W2775025821","https://openalex.org/W2891375999","https://openalex.org/W2896457183","https://openalex.org/W2904759072","https://openalex.org/W2951672049","https://openalex.org/W2962790997","https://openalex.org/W2962936818","https://openalex.org/W2963221727","https://openalex.org/W2963341956","https://openalex.org/W2963494889","https://openalex.org/W2964073004","https://openalex.org/W2964179938","https://openalex.org/W3102296242","https://openalex.org/W3104097132","https://openalex.org/W4212863985","https://openalex.org/W4294170691","https://openalex.org/W4294218128","https://openalex.org/W4298422451","https://openalex.org/W4307613132"],"related_works":["https://openalex.org/W2896411932","https://openalex.org/W2335882425","https://openalex.org/W3192794374","https://openalex.org/W2993300079","https://openalex.org/W4362613237","https://openalex.org/W3031457336","https://openalex.org/W2745862583","https://openalex.org/W2949267551","https://openalex.org/W2944092870","https://openalex.org/W3102086967"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,23,37,62,83],"present":[4],"a":[5,28,51],"fast":[6],"and":[7,43,72,78,114],"reliable":[8],"method":[9,128],"based":[10],"on":[11,58],"PCA":[12,42],"to":[13,86],"select":[14,63],"the":[15,39,46,55,64,102,127],"number":[16,65],"of":[17,34,66,92],"dimensions":[18,48,67],"for":[19,105],"word":[20],"embeddings.":[21],"First,":[22],"train":[24,87],"one":[25,49],"embedding":[26,123],"with":[27],"generous":[29],"upper":[30],"bound":[31],"(e.g.":[32],"1,000)":[33],"dimensions.":[35],"Then":[36],"transform":[38],"embeddings":[40,93,104],"using":[41,75],"incrementally":[44],"remove":[45],"lesser":[47],"at":[50],"time":[52],"while":[53,68,94],"recording":[54],"embeddings'":[56],"performance":[57],"language":[59,79],"tasks.":[60],"Lastly,":[61],"balancing":[69],"model":[70],"size":[71],"accuracy.":[73],"Experiments":[74],"various":[76],"datasets":[77],"tasks":[80],"demonstrate":[81],"that":[82],"are":[84],"able":[85],"10":[88],"times":[89],"fewer":[90],"sets":[91],"retaining":[95],"optimal":[96],"performance.":[97],"Researchers":[98],"interested":[99,121],"in":[100,122],"training":[101],"best-performing":[103],"downstream":[106],"tasks,":[107],"such":[108],"as":[109,117,119],"sentiment":[110],"analysis,":[111],"question":[112],"answering":[113],"hypernym":[115],"extraction,":[116],"well":[118],"those":[120],"compression":[124],"should":[125],"find":[126],"helpful.":[129]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
