{"id":"https://openalex.org/W3008266615","doi":"https://doi.org/10.1109/bigdata47090.2019.9006160","title":"Search for K: Assessing Five Topic-Modeling Approaches to 120,000 Canadian Articles","display_name":"Search for K: Assessing Five Topic-Modeling Approaches to 120,000 Canadian Articles","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3008266615","doi":"https://doi.org/10.1109/bigdata47090.2019.9006160","mag":"3008266615"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9006160","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037110618","display_name":"Qiang Fu","orcid":"https://orcid.org/0000-0002-7467-1355"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Qiang Fu","raw_affiliation_strings":["Department of Sociology, The University of British Columbia, Vancouver, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Sociology, The University of British Columbia, Vancouver, BC, Canada","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052671723","display_name":"Yufan Zhuang","orcid":"https://orcid.org/0000-0003-4063-6460"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yufan Zhuang","raw_affiliation_strings":["Data Science Institute Columbia University, New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Data Science Institute Columbia University, New York, NY, USA","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014257393","display_name":"Jiaxin Gu","orcid":"https://orcid.org/0000-0003-3131-8014"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiaxin Gu","raw_affiliation_strings":["Department of Sociology, The University of British Columbia, Vancouver, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Sociology, The University of British Columbia, Vancouver, BC, Canada","institution_ids":["https://openalex.org/I141945490"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017061047","display_name":"Yushu Zhu","orcid":"https://orcid.org/0000-0002-3843-0941"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Yushu Zhu","raw_affiliation_strings":["Urban Studies Program and School of Public Policy, Simon Fraser University, Vancouver, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Urban Studies Program and School of Public Policy, Simon Fraser University, Vancouver, BC, Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009675741","display_name":"Huihui Qin","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Huihui Qin","raw_affiliation_strings":["Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016055499","display_name":"Xin Guo","orcid":"https://orcid.org/0000-0002-7465-9356"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xin Guo","raw_affiliation_strings":["Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"Department of Applied Mathematics, The Hong Kong Polytechnic University, Hong Kong, China","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5037110618"],"corresponding_institution_ids":["https://openalex.org/I141945490"],"apc_list":null,"apc_paid":null,"fwci":8.2477,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98001554,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3640","last_page":"3647"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5886390805244446},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4048185348510742},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3821234405040741}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5886390805244446},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4048185348510742},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3821234405040741}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9006160","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9006160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:ira.lib.polyu.edu.hk:10397/88218","is_oa":false,"landing_page_url":"http://hdl.handle.net/10397/88218","pdf_url":null,"source":{"id":"https://openalex.org/S4306400205","display_name":"PolyU Institutional Research Archive (Hong Kong Polytechnic University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I14243506","host_organization_name":"Hong Kong Polytechnic University","host_organization_lineage":["https://openalex.org/I14243506"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1485732691","https://openalex.org/W1662191912","https://openalex.org/W1880262756","https://openalex.org/W1902027874","https://openalex.org/W1938475800","https://openalex.org/W1981552604","https://openalex.org/W2024932032","https://openalex.org/W2028080169","https://openalex.org/W2038043464","https://openalex.org/W2076818396","https://openalex.org/W2100163972","https://openalex.org/W2101746535","https://openalex.org/W2103878673","https://openalex.org/W2108316317","https://openalex.org/W2130339025","https://openalex.org/W2133286915","https://openalex.org/W2143017621","https://openalex.org/W2147152072","https://openalex.org/W2147946282","https://openalex.org/W2149684740","https://openalex.org/W2158997610","https://openalex.org/W2159426623","https://openalex.org/W2165599843","https://openalex.org/W2251582277","https://openalex.org/W2785834280","https://openalex.org/W2953320089","https://openalex.org/W3099514962","https://openalex.org/W3104887532","https://openalex.org/W4383745540","https://openalex.org/W6639619044","https://openalex.org/W6674922813","https://openalex.org/W6679482899","https://openalex.org/W6682044806","https://openalex.org/W6683333316","https://openalex.org/W6684489972","https://openalex.org/W6691363302","https://openalex.org/W6764970959"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Topic":[0],"modeling":[1,23,56,72,179],"has":[2],"been":[3,76],"an":[4],"important":[5],"field":[6],"in":[7,54,177,189],"natural":[8],"language":[9],"processing":[10],"(NLP)":[11],"and":[12,57,82,114,118,131,166,180],"recently":[13],"witnessed":[14],"great":[15],"methodological":[16],"advances.":[17],"Yet,":[18],"the":[19,41,44,65,184],"development":[20,66],"of":[21,47,67,186],"topic":[22,55,71,178],"is":[24,73],"still,":[25],"if":[26],"not":[27],"increasingly,":[28],"challenged":[29],"by":[30,94],"two":[31],"critical":[32],"issues.":[33],"First,":[34],"despite":[35],"intense":[36],"efforts":[37],"toward":[38],"nonparametric/post-training":[39],"methods,":[40],"search":[42],"for":[43],"optimal":[45,153,187],"number":[46],"topics":[48,154,188],"K":[49],"remains":[50],"a":[51,79],"fundamental":[52],"question":[53],"warrants":[58],"input":[59],"from":[60,108,172],"domain":[61],"experts.":[62],"Second,":[63],"with":[64,127],"more":[68],"sophisticated":[69],"models,":[70],"now":[74],"ironically":[75],"treated":[77],"as":[78],"black":[80],"box":[81],"it":[83],"becomes":[84],"increasingly":[85],"difficult":[86],"to":[87,148],"tell":[88],"how":[89],"research":[90,174],"findings":[91,171],"are":[92,155],"informed":[93],"data,":[95],"model":[96,129],"specifications,":[97],"or":[98],"inference":[99,132],"algorithms.":[100],"Based":[101],"on":[102],"about":[103],"120,000":[104],"newspaper":[105],"articles":[106],"retrieved":[107],"three":[109,159],"major":[110],"Canadian":[111],"newspapers":[112],"(Globe":[113],"Mail,":[115],"Toronto":[116],"Star,":[117],"National":[119],"Post)":[120],"since":[121],"1977,":[122],"we":[123],"employ":[124],"five":[125],"methods":[126],"different":[128],"specifications":[130],"algorithms":[133],"(Latent":[134],"Semantic":[135],"Analysis,":[136,142,144],"Latent":[137],"Dirichlet":[138],"Allocation,":[139],"Principal":[140],"Component":[141],"Factor":[143],"Nonnegative":[145],"Matrix":[146],"Factorization)":[147],"identify":[149],"discussion":[150],"topics.":[151],"The":[152],"then":[156],"assessed":[157],"using":[158],"measures:":[160],"coherence":[161],"statistics,":[162],"held-out":[163],"likelihood":[164],"(loss),":[165],"graph-based":[167],"dimensionality":[168],"selection.":[169],"Mixed":[170],"this":[173],"complement":[175],"advances":[176],"provide":[181],"insights":[182],"into":[183],"choice":[185],"social":[190],"science":[191],"research.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
