{"id":"https://openalex.org/W2888241476","doi":"https://doi.org/10.1145/3239235.3267435","title":"Measuring LDA topic stability from clusters of replicated runs","display_name":"Measuring LDA topic stability from clusters of replicated runs","publication_year":2018,"publication_date":"2018-10-04","ids":{"openalex":"https://openalex.org/W2888241476","doi":"https://doi.org/10.1145/3239235.3267435","mag":"2888241476"},"language":"en","primary_location":{"id":"doi:10.1145/3239235.3267435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3239235.3267435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM/IEEE International Symposium on Empirical Software Engineering and Measurement","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1808.08098","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078824435","display_name":"Mika M\u00e4ntyl\u00e4","orcid":"https://orcid.org/0000-0002-2841-5879"},"institutions":[{"id":"https://openalex.org/I98381234","display_name":"University of Oulu","ror":"https://ror.org/03yj89h83","country_code":"FI","type":"education","lineage":["https://openalex.org/I98381234"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Mika V. Mantyla","raw_affiliation_strings":["University of Oulu, Finland"],"affiliations":[{"raw_affiliation_string":"University of Oulu, Finland","institution_ids":["https://openalex.org/I98381234"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067646491","display_name":"Ma\u00eblick Claes","orcid":"https://orcid.org/0000-0003-2259-3946"},"institutions":[{"id":"https://openalex.org/I98381234","display_name":"University of Oulu","ror":"https://ror.org/03yj89h83","country_code":"FI","type":"education","lineage":["https://openalex.org/I98381234"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Maelick Claes","raw_affiliation_strings":["University of Oulu, Finland"],"affiliations":[{"raw_affiliation_string":"University of Oulu, Finland","institution_ids":["https://openalex.org/I98381234"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048619460","display_name":"Umar Farooq","orcid":"https://orcid.org/0000-0001-7229-9847"},"institutions":[{"id":"https://openalex.org/I98381234","display_name":"University of Oulu","ror":"https://ror.org/03yj89h83","country_code":"FI","type":"education","lineage":["https://openalex.org/I98381234"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Umar Farooq","raw_affiliation_strings":["University of Oulu, Finland"],"affiliations":[{"raw_affiliation_string":"University of Oulu, Finland","institution_ids":["https://openalex.org/I98381234"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5078824435"],"corresponding_institution_ids":["https://openalex.org/I98381234"],"apc_list":null,"apc_paid":null,"fwci":4.0614,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.95040812,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.9030767679214478},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.7726491093635559},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6948024034500122},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6597822308540344},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6369842290878296},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5924282670021057},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.5009078979492188},{"id":"https://openalex.org/keywords/commit","display_name":"Commit","score":0.45530715584754944},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.44973742961883545},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36295753717422485},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3501804769039154},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33279579877853394},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14749601483345032}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.9030767679214478},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.7726491093635559},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6948024034500122},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6597822308540344},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6369842290878296},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5924282670021057},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.5009078979492188},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.45530715584754944},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44973742961883545},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36295753717422485},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3501804769039154},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33279579877853394},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14749601483345032},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3239235.3267435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3239235.3267435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM/IEEE International Symposium on Empirical Software Engineering and Measurement","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1808.08098","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1808.08098","pdf_url":"https://arxiv.org/pdf/1808.08098","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:oulu.fi:nbnfi-fe201901031265","is_oa":false,"landing_page_url":"http://urn.fi/urn:nbn:fi-fe201901031265","pdf_url":null,"source":{"id":"https://openalex.org/S4306400284","display_name":"University of Oulu Repository (University of Oulu)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98381234","host_organization_name":"University of Oulu","host_organization_lineage":["https://openalex.org/I98381234"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1808.08098","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1808.08098","pdf_url":"https://arxiv.org/pdf/1808.08098","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7786573680","display_name":null,"funder_award_id":"298020","funder_id":"https://openalex.org/F4320321108","funder_display_name":"Academy of Finland"}],"funders":[{"id":"https://openalex.org/F4320321108","display_name":"Academy of Finland","ror":"https://ror.org/05k73zm37"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W153866397","https://openalex.org/W1539495021","https://openalex.org/W1880262756","https://openalex.org/W1939733313","https://openalex.org/W1972886276","https://openalex.org/W1979018646","https://openalex.org/W1979655048","https://openalex.org/W1983719983","https://openalex.org/W2001082470","https://openalex.org/W2014116953","https://openalex.org/W2021581601","https://openalex.org/W2038043464","https://openalex.org/W2094808586","https://openalex.org/W2098126593","https://openalex.org/W2104607014","https://openalex.org/W2120379285","https://openalex.org/W2140264852","https://openalex.org/W2250539671","https://openalex.org/W2290803316","https://openalex.org/W2304411646","https://openalex.org/W2397731355","https://openalex.org/W2461407631","https://openalex.org/W2475828198","https://openalex.org/W2507861678","https://openalex.org/W2587676558","https://openalex.org/W2604873636","https://openalex.org/W2963491860","https://openalex.org/W3105496546","https://openalex.org/W4231510805","https://openalex.org/W4250687526"],"related_works":["https://openalex.org/W2888805565","https://openalex.org/W4312773271","https://openalex.org/W4315588616","https://openalex.org/W2769501189","https://openalex.org/W2962686197","https://openalex.org/W2207653751","https://openalex.org/W4293863151","https://openalex.org/W3159709618","https://openalex.org/W2611137333","https://openalex.org/W3005513013"],"abstract_inverted_index":{"Background:":[0],"Unstructured":[1],"and":[2,8,48,62,93,150,200],"textual":[3],"data":[4,18],"is":[5,15,140,201],"increasing":[6],"rapidly":[7],"Latent":[9],"Dirichlet":[10],"Allocation":[11],"(LDA)":[12],"topic":[13,156],"modeling":[14],"a":[16,39,50],"popular":[17],"analysis":[19],"methods":[20],"for":[21,53,142],"it.":[22],"Past":[23],"work":[24],"suggests":[25],"that":[26,41,211],"instability":[27],"of":[28,116,125,164,178],"LDA":[29,45,60,91,99,197,214],"topics":[30,61,80,92,100,127],"may":[31,189],"lead":[32,190],"to":[33,76,81,161,174,191,207],"systematic":[34],"errors.":[35],"Aim:":[36],"We":[37,57,132,152],"propose":[38],"method":[40,139],"relies":[42],"on":[43,213],"replicated":[44],"runs,":[46],"clustering,":[47],"providing":[49],"stability":[51,113,124,157,198],"metric":[52],"the":[54,78,89,102,108,123,126,129,162,165],"topics.":[55,71,166],"Method:":[56],"generate":[58],"k":[59,82,85],"replicate":[63],"this":[64],"process":[65],"n":[66],"times":[67],"resulting":[68],"in":[69,169,180],"n*k":[70,79],"Then":[72],"we":[73,94,110,118],"use":[74],"K-medioids":[75],"cluster":[77],"clusters.":[83,130],"The":[84],"clusters":[86],"now":[87],"represent":[88],"original":[90],"present":[95],"them":[96],"like":[97],"normal":[98],"showing":[101,122],"ten":[103],"most":[104],"probable":[105],"words.":[106],"For":[107],"clusters,":[109],"try":[111],"multiple":[112],"metrics,":[114],"out":[115],"which":[117],"recommend":[119],"Rank-Biased":[120],"Overlap,":[121],"inside":[128],"Results:":[131],"provide":[133],"an":[134],"initial":[135],"validation":[136],"where":[137],"our":[138,155],"used":[141],"270,000":[143],"Mozilla":[144],"Firefox":[145],"commit":[146],"messages":[147],"with":[148],"k=20":[149],"n=20.":[151],"show":[153],"how":[154],"metrics":[158],"are":[159],"related":[160],"contents":[163],"Conclusions:":[167],"Advances":[168],"text":[170,179],"mining":[171],"enable":[172],"us":[173],"analyze":[175],"large":[176],"masses":[177],"software":[181],"engineering":[182],"but":[183],"non-deterministic":[184],"algorithms,":[185],"such":[186],"as":[187],"LDA,":[188],"unreplicable":[192],"conclusions.":[193],"Our":[194],"approach":[195],"makes":[196],"transparent":[199],"also":[202],"complementary":[203],"rather":[204],"than":[205],"alternative":[206],"many":[208],"prior":[209],"works":[210],"focus":[212],"parameter":[215],"tuning.":[216]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":5}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2018-08-31T00:00:00"}
