{"id":"https://openalex.org/W2202042777","doi":"https://doi.org/10.1109/bigdata.2015.7364114","title":"Using Word2Vec to process big text data","display_name":"Using Word2Vec to process big text data","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2202042777","doi":"https://doi.org/10.1109/bigdata.2015.7364114","mag":"2202042777"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2015.7364114","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7364114","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013219972","display_name":"Long Ma","orcid":"https://orcid.org/0000-0002-5011-2161"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Long Ma","raw_affiliation_strings":["Computer Science Department, Georgia State University, Atlanta, Georgia"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Georgia State University, Atlanta, Georgia","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100612105","display_name":"Yanqing Zhang","orcid":"https://orcid.org/0000-0003-2349-1925"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanqing Zhang","raw_affiliation_strings":["Computer Science Department, Georgia State University, Atlanta, Georgia"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Georgia State University, Atlanta, Georgia","institution_ids":["https://openalex.org/I181565077"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5013219972"],"corresponding_institution_ids":["https://openalex.org/I181565077"],"apc_list":null,"apc_paid":null,"fwci":6.6767,"has_fulltext":false,"cited_by_count":223,"citation_normalized_percentile":{"value":0.96763919,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2895","last_page":"2897"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.9629632830619812},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7782340049743652},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7305809259414673},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.6063796281814575},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5540074706077576},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5465561747550964},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.515629231929779},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5049031376838684},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5029856562614441},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.48858174681663513},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46172064542770386},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4103773236274719},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1086302101612091},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09093233942985535}],"concepts":[{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.9629632830619812},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7782340049743652},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7305809259414673},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.6063796281814575},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5540074706077576},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5465561747550964},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.515629231929779},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5049031376838684},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5029856562614441},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.48858174681663513},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46172064542770386},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4103773236274719},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1086302101612091},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09093233942985535},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2015.7364114","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2015.7364114","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1487615702","https://openalex.org/W1614298861","https://openalex.org/W1634005169","https://openalex.org/W1977556410","https://openalex.org/W2089468765","https://openalex.org/W2101234009","https://openalex.org/W2118585731","https://openalex.org/W2122056984","https://openalex.org/W2128476487","https://openalex.org/W2141599568","https://openalex.org/W2153579005","https://openalex.org/W2166445532","https://openalex.org/W4294170691","https://openalex.org/W6636510571","https://openalex.org/W6675354045","https://openalex.org/W6677656871","https://openalex.org/W6680890276","https://openalex.org/W6682691769"],"related_works":["https://openalex.org/W2980729574","https://openalex.org/W1560851690","https://openalex.org/W3092047717","https://openalex.org/W4390881630","https://openalex.org/W2905749112","https://openalex.org/W2346530426","https://openalex.org/W3099354896","https://openalex.org/W4287599800","https://openalex.org/W4312264180","https://openalex.org/W3046869600"],"abstract_inverted_index":{"Big":[0],"data":[1,5,17,32,37,48,61,82,131,163,190,195],"is":[2,19,92,109,197],"a":[3,20,68,146,188],"broad":[4],"set":[6,18],"that":[7,141,193],"has":[8],"been":[9],"used":[10],"in":[11,72],"many":[12,47],"fields.":[13],"To":[14],"process":[15],"huge":[16],"time":[21],"consuming":[22],"work,":[23,158],"not":[24,110],"only":[25],"due":[26],"to":[27,57,185],"its":[28],"big":[29,60],"volume":[30],"of":[31,64,74,76,94,117,123,134,149],"size,":[33],"but":[34,114],"also":[35],"because":[36],"type":[38],"and":[39,44,50,105,126,167,180],"structure":[40],"can":[41,66,142],"be":[42,86,143],"different":[43],"complex.":[45],"Currently,":[46],"mining":[49],"machine":[51],"learning":[52,70,90,119,135],"technique":[53],"are":[54],"being":[55],"applied":[56],"deal":[58],"with":[59],"problem;":[62],"some":[63],"them":[65],"construct":[67],"good":[69],"algorithm":[71,91],"terms":[73],"lots":[75],"training":[77,161],"example.":[78],"However,":[79],"considering":[80],"the":[81,100,153,162,169,176,182,194],"dimension,":[83],"it":[84,115],"will":[85],"more":[87],"efficient":[88],"if":[89],"capable":[93],"selecting":[95],"useful":[96],"features":[97],"or":[98,151],"decreasing":[99],"feature":[101],"dimension.":[102],"Word2Vec,":[103],"proposed":[104],"supported":[106],"by":[107],"Google,":[108],"an":[111],"individual":[112],"algorithm,":[113],"consists":[116],"two":[118],"models,":[120,136],"Continuous":[121],"Bag":[122],"Words":[124],"(CBOW)":[125],"Skip-gram.":[127],"By":[128],"feeding":[129],"text":[130,150],"into":[132,187],"one":[133],"Word2Vec":[137,165],"outputs":[138],"word":[139,170],"vectors":[140],"represented":[144],"as":[145],"large":[147],"piece":[148],"even":[152],"entire":[154],"article.":[155],"In":[156,172],"our":[157],"we":[159,174],"first":[160],"via":[164],"model":[166],"evaluated":[168],"similarity.":[171],"addition,":[173],"clustering":[175],"similar":[177],"words":[178],"together":[179],"use":[181],"generated":[183],"clusters":[184],"fit":[186],"new":[189],"dimension":[191,196],"so":[192],"decreased.":[198]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":27},{"year":2024,"cited_by_count":28},{"year":2023,"cited_by_count":40},{"year":2022,"cited_by_count":43},{"year":2021,"cited_by_count":33},{"year":2020,"cited_by_count":14},{"year":2019,"cited_by_count":16},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
