{"id":"https://openalex.org/W2747907397","doi":"https://doi.org/10.1145/3107411.3108195","title":"Best Setting of Model Parameters in Applying Topic Modeling on Textual Documents.","display_name":"Best Setting of Model Parameters in Applying Topic Modeling on Textual Documents.","publication_year":2017,"publication_date":"2017-08-20","ids":{"openalex":"https://openalex.org/W2747907397","doi":"https://doi.org/10.1145/3107411.3108195","mag":"2747907397"},"language":"en","primary_location":{"id":"doi:10.1145/3107411.3108195","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3108195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008241848","display_name":"Wen Zou","orcid":"https://orcid.org/0000-0002-3735-1133"},"institutions":[{"id":"https://openalex.org/I1320320070","display_name":"United States Food and Drug Administration","ror":"https://ror.org/034xvzb47","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1320320070"]},{"id":"https://openalex.org/I1304557061","display_name":"National Center for Toxicological Research","ror":"https://ror.org/05jmhh281","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1304557061","https://openalex.org/I1320320070"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wen Zou","raw_affiliation_strings":["NCTR/USFDA, Jefferson, AR, USA"],"affiliations":[{"raw_affiliation_string":"NCTR/USFDA, Jefferson, AR, USA","institution_ids":["https://openalex.org/I1304557061","https://openalex.org/I1320320070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079405950","display_name":"Weizhong Zhao","orcid":"https://orcid.org/0000-0001-8552-6084"},"institutions":[{"id":"https://openalex.org/I1320320070","display_name":"United States Food and Drug Administration","ror":"https://ror.org/034xvzb47","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1320320070"]},{"id":"https://openalex.org/I1304557061","display_name":"National Center for Toxicological Research","ror":"https://ror.org/05jmhh281","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1304557061","https://openalex.org/I1320320070"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weizhong Zhao","raw_affiliation_strings":["NCTR/USFDA, Jefferson, AR, USA"],"affiliations":[{"raw_affiliation_string":"NCTR/USFDA, Jefferson, AR, USA","institution_ids":["https://openalex.org/I1304557061","https://openalex.org/I1320320070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051461953","display_name":"James J. Chen","orcid":"https://orcid.org/0000-0001-6967-6349"},"institutions":[{"id":"https://openalex.org/I1304557061","display_name":"National Center for Toxicological Research","ror":"https://ror.org/05jmhh281","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1304557061","https://openalex.org/I1320320070"]},{"id":"https://openalex.org/I1320320070","display_name":"United States Food and Drug Administration","ror":"https://ror.org/034xvzb47","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1320320070"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James J. Chen","raw_affiliation_strings":["NCTR/USFDA, Jefferson, AR, USA"],"affiliations":[{"raw_affiliation_string":"NCTR/USFDA, Jefferson, AR, USA","institution_ids":["https://openalex.org/I1304557061","https://openalex.org/I1320320070"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048655816","display_name":"Roger Perkins","orcid":null},"institutions":[{"id":"https://openalex.org/I1304557061","display_name":"National Center for Toxicological Research","ror":"https://ror.org/05jmhh281","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1304557061","https://openalex.org/I1320320070"]},{"id":"https://openalex.org/I1320320070","display_name":"United States Food and Drug Administration","ror":"https://ror.org/034xvzb47","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1320320070"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Roger Perkins","raw_affiliation_strings":["NCTR/USFDA, Jefferson, AR, USA"],"affiliations":[{"raw_affiliation_string":"NCTR/USFDA, Jefferson, AR, USA","institution_ids":["https://openalex.org/I1304557061","https://openalex.org/I1320320070"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5008241848"],"corresponding_institution_ids":["https://openalex.org/I1304557061","https://openalex.org/I1320320070"],"apc_list":null,"apc_paid":null,"fwci":0.195,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61856142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"588","last_page":"588"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.7713358998298645},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.765410840511322},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.762471079826355},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.5782644748687744},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5427470803260803},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5314879417419434},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5135455131530762},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.5106044411659241},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.47343209385871887},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.46615278720855713},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.45631277561187744},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4485975205898285},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4406859874725342},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4181120991706848},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.2258438766002655}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.7713358998298645},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.765410840511322},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.762471079826355},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.5782644748687744},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5427470803260803},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5314879417419434},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5135455131530762},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.5106044411659241},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.47343209385871887},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.46615278720855713},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.45631277561187744},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4485975205898285},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4406859874725342},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4181120991706848},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2258438766002655},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3107411.3108195","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3107411.3108195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th ACM International Conference on Bioinformatics, Computational Biology,and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1880262756","https://openalex.org/W2162317738","https://openalex.org/W4231510805"],"related_works":["https://openalex.org/W2888805565","https://openalex.org/W2761847515","https://openalex.org/W4312773271","https://openalex.org/W4315588616","https://openalex.org/W2769501189","https://openalex.org/W2962686197","https://openalex.org/W4293734197","https://openalex.org/W2207653751","https://openalex.org/W4206967254","https://openalex.org/W2131689821"],"abstract_inverted_index":{"Probabilistic":[0],"topic":[1,41,56,176,184,239,307,325],"modeling":[2,240],"is":[3,51,121,266,274],"an":[4,17,230],"active":[5],"research":[6],"field":[7],"in":[8,81,271,289],"machine":[9],"learning":[10],"and":[11,72,74,77,131,152,158,181,205,242,260,264,314,318,333],"has":[12],"been":[13],"mainly":[14],"used":[15,55],"as":[16,114,123,331],"analytical":[18],"tool":[19],"to":[20,33,43,83,99,169,224,252,276,324],"structure":[21,34],"large":[22],"textual":[23,36,153],"corpora":[24],"for":[25,137,173],"data":[26,191,210],"mining.":[27,46],"It":[28],"offers":[29],"a":[30,60,96,115,124,188,209,281],"viable":[31],"approach":[32,98],"huge":[35],"document":[37],"collections":[38],"into":[39],"latent":[40],"themes":[42,201],"aid":[44],"text":[45,303],"Latent":[47],"Dirichlet":[48],"Allocation":[49],"(LDA)":[50],"the":[52,85,101,108,129,134,219,236,243],"most":[53,102,220],"commonly":[54],"modelling":[57],"method":[58,136],"across":[59],"wide":[61],"number":[62,104],"of":[63,88,105,110,117,119,133,142,183,212,238,293],"technical":[64],"fields.":[65],"However,":[66],"model":[67,89,245,272],"development":[68],"can":[69,286],"be":[70,253,287],"arduous":[71],"tedious,":[73],"requires":[75,229],"burdensome":[76],"systematic":[78],"sensitivity":[79,167,226],"studies":[80,168],"order":[82],"find":[84],"best":[86,171],"set":[87,192,211],"parameters.":[90],"In":[91],"this":[92],"study,":[93],"we":[94,164,186],"use":[95],"heuristic":[97],"estimate":[100],"appropriate":[103],"topics.":[106],"Specifically,":[107],"rate":[109],"perplexity":[111],"change":[112],"(RPC)":[113],"function":[116],"numbers":[118],"topics":[120],"proposed":[122,135],"suitable":[125],"selector.":[126],"We":[127,215],"test":[128,179],"stability":[130],"effectiveness":[132,180],"three":[138],"markedly":[139],"different":[140],"types":[141],"grounded-truth":[143],"datasets:":[144],"Salmonella":[145],"next":[146],"generation":[147],"sequencing,":[148],"pharmacological":[149],"side":[150],"effects,":[151],"abstracts":[154],"on":[155],"computational":[156],"biology":[157],"bioinformatics":[159],"(TCBB)":[160],"from":[161,193],"PubMed.":[162],"Then":[163],"describe":[165],"extensive":[166],"determine":[170],"practices":[172],"generating":[174],"effective":[175],"models.":[177],"To":[178],"validity":[182],"models,":[185],"constructed":[187],"ground":[189],"truth":[190],"PubMed":[194],"that":[195,217,268],"contained":[196],"some":[197],"40":[198],"health":[199],"related":[200],"including":[202],"negative":[203],"controls,":[204],"mixed":[206],"it":[207],"with":[208,255,291,296],"unstructured":[213],"documents.":[214],"found":[216],"obtaining":[218],"useful":[221],"model,":[222,284],"tuned":[223],"desired":[225],"versus":[227],"specificity,":[228],"iterative":[231],"process":[232],"wherein":[233],"preprocessing":[234],"steps,":[235],"type":[237],"algorithm,":[241],"algorithm's":[244],"parameters":[246],"are":[247,309,316,322,329,339],"systematically":[248],"varied.":[249],"Models":[250],"need":[251],"compared":[254],"both":[256],"qualitative,":[257],"subjective":[258],"assessments":[259],"quantitative,":[261],"objective":[262],"assessments,":[263],"care":[265],"required":[267],"Gibbs":[269],"sampling":[270],"estimation":[273],"sufficient":[275],"assure":[277],"stable":[278],"solutions.":[279],"With":[280],"high":[282],"quality":[283],"documents":[285,315],"rank-ordered":[288],"accordance":[290],"probability":[292],"being":[294],"associated":[295],"complex":[297],"regulatory":[298],"query":[299],"string,":[300],"greatly":[301],"lessoning":[302],"mining":[304],"work.":[305],"Importantly,":[306],"models":[308,326],"agnostic":[310],"about":[311],"how":[312],"words":[313],"defined,":[317],"thus":[319],"our":[320],"findings":[321],"extensible":[323],"where":[327],"samples":[328],"defined":[330],"documents,":[332],"genes,":[334],"proteins":[335],"or":[336],"their":[337],"sequences":[338],"words.":[340]},"counts_by_year":[{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
