{"id":"https://openalex.org/W1508823512","doi":"https://doi.org/10.1109/icc.2015.7248483","title":"Extracting unknown words from Sina Weibo via data clustering","display_name":"Extracting unknown words from Sina Weibo via data clustering","publication_year":2015,"publication_date":"2015-06-01","ids":{"openalex":"https://openalex.org/W1508823512","doi":"https://doi.org/10.1109/icc.2015.7248483","mag":"1508823512"},"language":"en","primary_location":{"id":"doi:10.1109/icc.2015.7248483","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icc.2015.7248483","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Communications (ICC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078920845","display_name":"Kai Lei","orcid":"https://orcid.org/0000-0001-9197-895X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kai Lei","raw_affiliation_strings":["Institute of Big Data Technologies Shenzhen Key Lab for Cloud Computing Technology & Applications, Peking University","Institute of Big Data Technologies, Shenzhen Key Lab for Cloud Computing Technology & Applications, School of Electronics and Computer Engineering(SECE), Peking University, China"],"affiliations":[{"raw_affiliation_string":"Institute of Big Data Technologies Shenzhen Key Lab for Cloud Computing Technology & Applications, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Institute of Big Data Technologies, Shenzhen Key Lab for Cloud Computing Technology & Applications, School of Electronics and Computer Engineering(SECE), Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021955525","display_name":"Zhang Wei-yang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"WeiYang Zhang","raw_affiliation_strings":["Institute of Big Data Technologies Shenzhen Key Lab for Cloud Computing Technology & Applications, Peking University","Institute of Big Data Technologies, Shenzhen Key Lab for Cloud Computing Technology & Applications, School of Electronics and Computer Engineering(SECE), Peking University, China"],"affiliations":[{"raw_affiliation_string":"Institute of Big Data Technologies Shenzhen Key Lab for Cloud Computing Technology & Applications, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Institute of Big Data Technologies, Shenzhen Key Lab for Cloud Computing Technology & Applications, School of Electronics and Computer Engineering(SECE), Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100324056","display_name":"Kai Zhang","orcid":"https://orcid.org/0000-0003-3850-5429"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Zhang","raw_affiliation_strings":["Institute of Big Data Technologies Shenzhen Key Lab for Cloud Computing Technology & Applications, Peking University","Institute of Big Data Technologies, Shenzhen Key Lab for Cloud Computing Technology & Applications, School of Electronics and Computer Engineering(SECE), Peking University, China"],"affiliations":[{"raw_affiliation_string":"Institute of Big Data Technologies Shenzhen Key Lab for Cloud Computing Technology & Applications, Peking University","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Institute of Big Data Technologies, Shenzhen Key Lab for Cloud Computing Technology & Applications, School of Electronics and Computer Engineering(SECE), Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049423626","display_name":"Kuai Xu","orcid":"https://orcid.org/0000-0001-6659-6773"},"institutions":[{"id":"https://openalex.org/I55732556","display_name":"Arizona State University","ror":"https://ror.org/03efmqc40","country_code":"US","type":"education","lineage":["https://openalex.org/I55732556"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kuai Xu","raw_affiliation_strings":["School of Mathematical and Natural Sciences, Arizona State University","School of Mathematical and Natural Sciences, Arizona State University, United States"],"affiliations":[{"raw_affiliation_string":"School of Mathematical and Natural Sciences, Arizona State University","institution_ids":["https://openalex.org/I55732556"]},{"raw_affiliation_string":"School of Mathematical and Natural Sciences, Arizona State University, United States","institution_ids":["https://openalex.org/I55732556"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5078920845"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.3838,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.90313398,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1182","last_page":"1187"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/microblogging","display_name":"Microblogging","score":0.8719847202301025},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.799013078212738},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.7766330242156982},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7670449614524841},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5855721831321716},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5419107675552368},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.5002868175506592},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.46808773279190063},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.4339020550251007},{"id":"https://openalex.org/keywords/keyword-extraction","display_name":"Keyword extraction","score":0.4297584295272827},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.42691507935523987},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41542285680770874},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3764446973800659},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2314852774143219}],"concepts":[{"id":"https://openalex.org/C143275388","wikidata":"https://www.wikidata.org/wiki/Q92438","display_name":"Microblogging","level":3,"score":0.8719847202301025},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.799013078212738},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.7766330242156982},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7670449614524841},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5855721831321716},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5419107675552368},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.5002868175506592},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.46808773279190063},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.4339020550251007},{"id":"https://openalex.org/C2780288562","wikidata":"https://www.wikidata.org/wiki/Q25053353","display_name":"Keyword extraction","level":2,"score":0.4297584295272827},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.42691507935523987},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41542285680770874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3764446973800659},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2314852774143219},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icc.2015.7248483","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icc.2015.7248483","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Communications (ICC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320326446","display_name":"National Development and Reform Commission","ror":"https://ror.org/017a59b72"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W91511145","https://openalex.org/W182571877","https://openalex.org/W786936280","https://openalex.org/W1534625513","https://openalex.org/W1564105441","https://openalex.org/W1861196058","https://openalex.org/W1864482474","https://openalex.org/W1997384395","https://openalex.org/W2024531518","https://openalex.org/W2033755677","https://openalex.org/W2065688092","https://openalex.org/W2093424574","https://openalex.org/W2096301385","https://openalex.org/W2107946060","https://openalex.org/W2131417696","https://openalex.org/W2159193292","https://openalex.org/W2252264945","https://openalex.org/W2270993793","https://openalex.org/W2358307482","https://openalex.org/W6603802636","https://openalex.org/W6622716546","https://openalex.org/W6632081347","https://openalex.org/W6656873983","https://openalex.org/W6679705213","https://openalex.org/W6691657211"],"related_works":["https://openalex.org/W2275433313","https://openalex.org/W2053241453","https://openalex.org/W2017590198","https://openalex.org/W2358294942","https://openalex.org/W1497288441","https://openalex.org/W4367460280","https://openalex.org/W2062023296","https://openalex.org/W2351671768","https://openalex.org/W2581112652","https://openalex.org/W2052115016"],"abstract_inverted_index":{"Sina":[0],"Weibo,":[1],"a":[2,68,103],"Twitter-like":[3],"microblogging":[4],"site":[5],"attracting":[6],"over":[7],"240":[8],"million":[9],"monthly":[10],"active":[11],"users":[12,33,77,96],"to":[13,47,106],"tweet,":[14],"retweet,":[15],"and":[16,36,42,78,87,146,155],"comment,":[17],"has":[18],"rapidly":[19],"become":[20],"one":[21],"of":[22,75,94,124,136,148],"the":[23,92,95,99,122,134,137,144],"most":[24],"popular":[25],"social":[26],"media":[27],"sites":[28],"in":[29,56],"China.":[30],"As":[31],"many":[32],"create":[34],"new":[35],"innovative":[37],"words":[38,83,152],"on":[39,72,91],"their":[40],"tweets":[41,79,86,154],"comments,":[43],"it":[44],"is":[45],"necessary":[46],"extract":[48],"these":[49],"emerging":[50],"words,":[51],"which":[52],"do":[53],"not":[54],"exist":[55],"today's":[57],"Chinese":[58,151],"vocabulary":[59],"or":[60],"dictionary.":[61],"Towards":[62],"this":[63,65],"end,":[64],"paper":[66],"proposes":[67],"novel":[69],"method":[70,123],"based":[71],"data":[73,109,139],"clustering":[74,105,140],"Weibo":[76,85,108,126],"for":[80,142],"extracting":[81,149],"unknown":[82,150],"from":[84,153],"comments.":[88,156],"Specifically,":[89],"relying":[90],"similarity":[93],"who":[97],"post":[98],"tweets,":[100],"we":[101],"apply":[102],"hierarchical":[104],"divide":[107],"into":[110],"distinct":[111],"groups,":[112],"e.g.,":[113],"sports,":[114],"news":[115],"stories,":[116],"movies,":[117],"before":[118],"extraction.":[119],"Comparing":[120],"with":[121],"unclustered":[125],"data,":[127],"our":[128],"experimental":[129],"results":[130],"have":[131],"successfully":[132],"demonstrated":[133],"benefits":[135],"proposed":[138],"scheme":[141],"improving":[143],"recall":[145],"accuracy":[147]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-25T21:42:39.735039","created_date":"2025-10-10T00:00:00"}
