{"id":"https://openalex.org/W2765260302","doi":"https://doi.org/10.1109/hpec.2017.8091076","title":"Efficient and accurate Word2Vec implementations in GPU and shared-memory multicore architectures","display_name":"Efficient and accurate Word2Vec implementations in GPU and shared-memory multicore architectures","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2765260302","doi":"https://doi.org/10.1109/hpec.2017.8091076","mag":"2765260302"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2017.8091076","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2017.8091076","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022353734","display_name":"Trevor M. Simonton","orcid":null},"institutions":[{"id":"https://openalex.org/I131651094","display_name":"University of Denver","ror":"https://ror.org/04w7skc03","country_code":"US","type":"education","lineage":["https://openalex.org/I131651094"]},{"id":"https://openalex.org/I921990950","display_name":"University of Colorado Denver","ror":"https://ror.org/02hh7en24","country_code":"US","type":"education","lineage":["https://openalex.org/I921990950"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Trevor M. Simonton","raw_affiliation_strings":["Computer Science and Engineering Department, University of Colorado Denver, Denver, CO"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering Department, University of Colorado Denver, Denver, CO","institution_ids":["https://openalex.org/I131651094","https://openalex.org/I921990950"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005161142","display_name":"Gita Alaghband","orcid":null},"institutions":[{"id":"https://openalex.org/I921990950","display_name":"University of Colorado Denver","ror":"https://ror.org/02hh7en24","country_code":"US","type":"education","lineage":["https://openalex.org/I921990950"]},{"id":"https://openalex.org/I131651094","display_name":"University of Denver","ror":"https://ror.org/04w7skc03","country_code":"US","type":"education","lineage":["https://openalex.org/I131651094"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gita Alaghband","raw_affiliation_strings":["Computer Science and Engineering Department, University of Colorado Denver, Denver, CO"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering Department, University of Colorado Denver, Denver, CO","institution_ids":["https://openalex.org/I131651094","https://openalex.org/I921990950"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5022353734"],"corresponding_institution_ids":["https://openalex.org/I131651094","https://openalex.org/I921990950"],"apc_list":null,"apc_paid":null,"fwci":1.1266,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.7971187,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"9948","issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8216451406478882},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7665597796440125},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.7367688417434692},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.7082306742668152},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.6094919443130493},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.6066996455192566},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5537195205688477},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18263483047485352},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09467214345932007}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8216451406478882},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7665597796440125},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.7367688417434692},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.7082306742668152},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.6094919443130493},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.6066996455192566},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5537195205688477},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18263483047485352},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09467214345932007},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2017.8091076","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2017.8091076","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W36903255","https://openalex.org/W168564468","https://openalex.org/W1614298861","https://openalex.org/W2053921957","https://openalex.org/W2078483536","https://openalex.org/W2153579005","https://openalex.org/W2202715214","https://openalex.org/W2338087314","https://openalex.org/W2467655749","https://openalex.org/W2524917517","https://openalex.org/W3216404684","https://openalex.org/W6636510571"],"related_works":["https://openalex.org/W2980729574","https://openalex.org/W1560851690","https://openalex.org/W3092047717","https://openalex.org/W4390881630","https://openalex.org/W2770162183","https://openalex.org/W3110772647","https://openalex.org/W2025467172","https://openalex.org/W3023876411","https://openalex.org/W4205439893","https://openalex.org/W123152114"],"abstract_inverted_index":{"Word2Vec":[0,48],"is":[1],"a":[2,11,29,130,150],"popular":[3],"set":[4],"of":[5,19,31,46,98,133,152,155],"machine":[6,32],"learning":[7,33],"algorithms":[8],"that":[9,109],"use":[10],"neural":[12],"network":[13],"to":[14,25,42,78,83,86],"generate":[15],"dense":[16],"vector":[17],"representations":[18],"words.":[20],"These":[21],"vectors":[22,135],"have":[23],"proven":[24],"be":[26],"useful":[27],"in":[28],"variety":[30],"tasks.":[34],"In":[35],"this":[36,70],"work,":[37],"we":[38],"propose":[39,91],"new":[40,92],"methods":[41],"increase":[43,79],"the":[44,47,100,156],"speed":[45],"skip":[49,101,123,143],"gram":[50,102,124,144],"with":[51,66,125,145],"hierarchical":[52,103,146],"softmax":[53,104,147],"architecture":[54],"on":[55,62,71],"multi-core":[56,72],"shared":[57,87,111],"memory":[58,112],"CPU":[59],"systems,":[60],"and":[61,82,105,114,140],"modern":[63],"NVIDIA":[64,94],"GPUs":[65],"CUDA.":[67],"We":[68,89],"accomplish":[69],"CPUs":[73],"by":[74],"batching":[75],"training":[76],"operations":[77,117],"thread":[80],"locality":[81],"reduce":[84],"accesses":[85],"memory.":[88],"then":[90],"heterogeneous":[93],"GPU":[95,122,138],"CUDA":[96],"implementations":[97],"both":[99],"negative":[106,126],"sampling":[107,127],"techniques":[108],"utilize":[110],"registers":[113],"in-warp":[115],"shuffle":[116],"for":[118],"maximized":[119],"performance.":[120],"Our":[121],"approach":[128],"produces":[129],"higher":[131],"quality":[132],"word":[134],"than":[136],"previous":[137],"implementations,":[139],"our":[141],"flexible":[142],"implementation":[148],"achieves":[149],"factor":[151],"10":[153],"speedup":[154],"existing":[157],"methods.":[158]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2019,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
