{"id":"https://openalex.org/W4403674158","doi":"https://doi.org/10.15439/2024f1593","title":"Topic Modeling of the SrpELTeC Corpus: A Comparison of NMF, LDA, and BERTopic","display_name":"Topic Modeling of the SrpELTeC Corpus: A Comparison of NMF, LDA, and BERTopic","publication_year":2024,"publication_date":"2024-10-23","ids":{"openalex":"https://openalex.org/W4403674158","doi":"https://doi.org/10.15439/2024f1593"},"language":"en","primary_location":{"id":"doi:10.15439/2024f1593","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2024f1593","pdf_url":"https://annals-csis.org/Volume_39/drp/pdf/1593.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://annals-csis.org/Volume_39/drp/pdf/1593.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092938121","display_name":"Teodora Mihajlov","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Teodora Mihajlov","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062254354","display_name":"Milica Ikoni\u0107 Ne\u0161i\u0107","orcid":"https://orcid.org/0000-0002-0835-8889"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Milica Ikoni\u0107 Ne\u0161i\u0107","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000158514","display_name":"Ranka Stankovi\u0107","orcid":"https://orcid.org/0000-0001-5123-6273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ranka Stankovi\u0107","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5081680669","display_name":"Olivera Kitanovi\u0107","orcid":"https://orcid.org/0000-0002-7571-2729"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivera Kitanovi\u0107","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5092938121"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.6288,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.85736434,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"39","issue":null,"first_page":"649","last_page":"653"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8910999894142151,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8910999894142151,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10102","display_name":"scientometrics and bibliometrics research","score":0.886900007724762,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.8450999855995178,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6681030988693237},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6406410932540894},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5852124691009521},{"id":"https://openalex.org/keywords/non-negative-matrix-factorization","display_name":"Non-negative matrix factorization","score":0.4901352822780609},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.33404210209846497},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.11732232570648193},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.05283212661743164},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.04671740531921387}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6681030988693237},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6406410932540894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5852124691009521},{"id":"https://openalex.org/C152671427","wikidata":"https://www.wikidata.org/wiki/Q10843505","display_name":"Non-negative matrix factorization","level":4,"score":0.4901352822780609},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.33404210209846497},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.11732232570648193},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.05283212661743164},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.04671740531921387},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.15439/2024f1593","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2024f1593","pdf_url":"https://annals-csis.org/Volume_39/drp/pdf/1593.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:doaj.org/article:5efe688950c04c57bb229e98fdf36331","is_oa":true,"landing_page_url":"https://doaj.org/article/5efe688950c04c57bb229e98fdf36331","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annals of computer science and information systems, Vol 39, Pp 649-653 (2024)","raw_type":"article"},{"id":"pmh:oai:dr.rgf.bg.ac.rs:9167","is_oa":false,"landing_page_url":"http://doi.org/10.15439/2024F1593","pdf_url":null,"source":{"id":"https://openalex.org/S7407055281","display_name":"Dr RGF - RGF Repository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"\u043e\u0431\u0458\u0430\u0432\u0459\u0435\u043d\u0430"}],"best_oa_location":{"id":"doi:10.15439/2024f1593","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2024f1593","pdf_url":"https://annals-csis.org/Volume_39/drp/pdf/1593.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320329307","display_name":"Science Fund of the Republic of Serbia","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403674158.pdf","grobid_xml":"https://content.openalex.org/works/W4403674158.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4399648768","https://openalex.org/W2101428145","https://openalex.org/W4297433825","https://openalex.org/W4318261659","https://openalex.org/W2122823937","https://openalex.org/W3215340255","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Topic":[0,78,82,113,124],"modeling":[1,56],"is":[2],"an":[3],"effective":[4],"way":[5],"to":[6,33,55],"gain":[7],"insight":[8],"into":[9],"large":[10],"amounts":[11],"of":[12,14,41,91],"data.Some":[13],"the":[15,39,49,71,92,104,111,122],"most":[16],"widely":[17],"used":[18],"topic":[19],"models":[20,43],"are":[21],"Latent":[22],"Dirichlet":[23],"allocation":[24],"(LDA)":[25],"and":[26,44,64,81,85,138],"Nonnegative":[27],"Matrix":[28],"Factorization":[29],"(NMF).However,":[30],"new":[31],"ways":[32],"mine":[34],"topics":[35],"have":[36],"emerged":[37],"with":[38,103,118],"rise":[40],"self-attention":[42],"pretrained":[45],"language":[46],"models.BERTopic":[47],"represents":[48],"current":[50],"stateof-the-art":[51],"when":[52],"it":[53],"comes":[54],"topics.In":[57],"this":[58],"paper,":[59],"we":[60,97],"compared":[61,98],"LDA,":[62],"NMF,":[63],"BERTopic":[65,117],"performance":[66],"on":[67],"literary":[68],"texts":[69],"in":[70,135],"Serbian":[72,127],"language,":[73],"both":[74,136],"quantitatively":[75],"by":[76,86],"measuring":[77],"Coherency":[79,114],"(TC)":[80],"Diversity":[83],"(TD),":[84],"conducting":[87],"a":[88],"qualitative":[89],"evaluation":[90],"obtained":[93],"topics.Additionally,":[94],"for":[95,108],"BERTopic,":[96],"multilingual":[99],"sentence":[100,132],"transformer":[101,133],"embeddings":[102,107,120,129,134],"Jerteh-355":[105,119,128],"monolingual":[106,126],"Serbian.NMF":[109],"yielded":[110],"best":[112,123],"results,":[115],"while":[116],"gave":[121],"Diveristy.The":[125],"also":[130],"outperformed":[131],"TC":[137],"TD.":[139]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":7}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
