{"id":"https://openalex.org/W7126155022","doi":"https://doi.org/10.3390/informatics13020024","title":"Exploring Scientific Literature Using Topic Modeling: A Practical Framework for Discovery and Classification","display_name":"Exploring Scientific Literature Using Topic Modeling: A Practical Framework for Discovery and Classification","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7126155022","doi":"https://doi.org/10.3390/informatics13020024"},"language":"en","primary_location":{"id":"doi:10.3390/informatics13020024","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics13020024","pdf_url":null,"source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/informatics13020024","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018021622","display_name":"Amir Alipour Yengejeh","orcid":null},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amir Alipour Yengejeh","raw_affiliation_strings":["Department of Statistics and Data Science, University of Central Florida, Orlando, FL 32816, USA"],"raw_orcid":"https://orcid.org/0009-0002-1424-5589","affiliations":[{"raw_affiliation_string":"Department of Statistics and Data Science, University of Central Florida, Orlando, FL 32816, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091539370","display_name":"Larry Tang","orcid":"https://orcid.org/0009-0000-7749-6196"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Larry Tang","raw_affiliation_strings":["Department of Statistics and Data Science, University of Central Florida, Orlando, FL 32816, USA","National Center for Forensic Science, University of Central Florida, P.O. Box 162367, Orlando, FL 32816, USA"],"raw_orcid":"https://orcid.org/0000-0002-7276-155X","affiliations":[{"raw_affiliation_string":"Department of Statistics and Data Science, University of Central Florida, Orlando, FL 32816, USA","institution_ids":["https://openalex.org/I106165777"]},{"raw_affiliation_string":"National Center for Forensic Science, University of Central Florida, P.O. Box 162367, Orlando, FL 32816, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091762322","display_name":"Candice Bridge","orcid":"https://orcid.org/0000-0002-0472-3025"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Candice M. Bridge","raw_affiliation_strings":["Department of Chemistry, University of Central Florida, Orlando, FL 32816, USA","National Center for Forensic Science, University of Central Florida, P.O. Box 162367, Orlando, FL 32816, USA"],"raw_orcid":"https://orcid.org/0000-0002-0472-3025","affiliations":[{"raw_affiliation_string":"Department of Chemistry, University of Central Florida, Orlando, FL 32816, USA","institution_ids":["https://openalex.org/I106165777"]},{"raw_affiliation_string":"National Center for Forensic Science, University of Central Florida, P.O. Box 162367, Orlando, FL 32816, USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059542025","display_name":"Chandra Kundu","orcid":"https://orcid.org/0000-0001-8967-0593"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chandra Kundu","raw_affiliation_strings":["Department of Statistics and Data Science, University of Central Florida, Orlando, FL 32816, USA"],"raw_orcid":"https://orcid.org/0000-0001-8967-0593","affiliations":[{"raw_affiliation_string":"Department of Statistics and Data Science, University of Central Florida, Orlando, FL 32816, USA","institution_ids":["https://openalex.org/I106165777"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018021622"],"corresponding_institution_ids":["https://openalex.org/I106165777"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":39.0459,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.99247944,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"13","issue":"2","first_page":"24","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.3668000102043152,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.3668000102043152,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.10899999737739563,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08980000019073486,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.7448999881744385},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.6500999927520752},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4749000072479248},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.46129998564720154},{"id":"https://openalex.org/keywords/scientific-literature","display_name":"Scientific literature","score":0.4404999911785126},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.43560001254081726},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.4327999949455261},{"id":"https://openalex.org/keywords/thematic-structure","display_name":"Thematic structure","score":0.43160000443458557},{"id":"https://openalex.org/keywords/bayesian-inference","display_name":"Bayesian inference","score":0.35589998960494995}],"concepts":[{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.7448999881744385},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.666700005531311},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.6500999927520752},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5202000141143799},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4912000000476837},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4749000072479248},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.46129998564720154},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.4404999911785126},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.43560001254081726},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C2778109090","wikidata":"https://www.wikidata.org/wiki/Q7781195","display_name":"Thematic structure","level":2,"score":0.43160000443458557},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3910999894142151},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.37950000166893005},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.35589998960494995},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.35420000553131104},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3465000092983246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34459999203681946},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3328000009059906},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.32919999957084656},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C2781280628","wikidata":"https://www.wikidata.org/wiki/Q5280766","display_name":"Dirichlet process","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C178315738","wikidata":"https://www.wikidata.org/wiki/Q603441","display_name":"Bibliometrics","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.25220000743865967},{"id":"https://openalex.org/C93692415","wikidata":"https://www.wikidata.org/wiki/Q1502030","display_name":"Thematic map","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/informatics13020024","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics13020024","pdf_url":null,"source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:40f81aeedc5c4497bc1612ddb3d38924","is_oa":true,"landing_page_url":"https://doaj.org/article/40f81aeedc5c4497bc1612ddb3d38924","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Informatics, Vol 13, Iss 2, p 24 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/informatics13020024","is_oa":true,"landing_page_url":"https://doi.org/10.3390/informatics13020024","pdf_url":null,"source":{"id":"https://openalex.org/S2738238905","display_name":"Informatics","issn_l":"2227-9709","issn":["2227-9709"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Informatics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,100],"increasing":[1],"volume":[2],"and":[3,12,16,67,97,114,119,138,167,176],"diversity":[4],"of":[5,46,86,136,143],"scientific":[6,37,180],"publications":[7],"poses":[8],"challenges":[9],"for":[10,107,171],"scalable":[11],"interpretable":[13],"topic":[14,29,62,81,152,157],"discovery":[15],"automated":[17],"document":[18],"categorization.":[19],"This":[20],"study":[21],"proposes":[22],"an":[23,134],"integrated":[24],"framework":[25,162],"that":[26],"combines":[27],"probabilistic":[28],"modeling":[30],"with":[31,75,83,147],"supervised":[32,108],"classification":[33],"to":[34,56,156],"support":[35],"large-scale":[36],"literature":[38,172],"analysis.":[39],"Using":[40],"3689":[41],"abstracts":[42],"from":[43],"the":[44,84,91,117,160],"Journal":[45],"Forensic":[47],"Sciences":[48],"(2009\u20132022),":[49],"Latent":[50],"Dirichlet":[51],"Allocation":[52],"(LDA)":[53],"is":[54,123],"applied":[55],"uncover":[57],"latent":[58],"thematic":[59],"structures,":[60],"assess":[61],"diagnosticity":[63],"across":[64],"forensic":[65],"disciplines,":[66],"analyze":[68],"temporal":[69],"research":[70],"trends.":[71],"Bayesian":[72],"model":[73],"selection":[74],"repeated":[76],"resampling":[77,115],"identifies":[78],"a":[79,126,139,164],"stable":[80,121],"resolution,":[82],"number":[85],"topics":[87],"T":[88],"lying":[89],"in":[90,179],"range":[92],"83\u201388,":[93],"yielding":[94],"semantically":[95],"coherent":[96],"discipline-aligned":[98],"topics.":[99],"resulting":[101],"document\u2013topic":[102],"representations":[103],"are":[104],"then":[105],"used":[106],"abstract":[109],"classification.":[110],"Across":[111],"multiple":[112],"models":[113],"scenarios,":[116],"strongest":[118],"most":[120],"performance":[122],"achieved":[124],"under":[125],"Grouped":[127],"Category":[128],"configuration.":[129],"In":[130],"particular,":[131],"XGBoost":[132],"attains":[133],"Accuracy":[135],"0.754":[137],"Macro-averaged":[140],"F1":[141],"score":[142],"0.737":[144],"at":[145,150],"T=88,":[146],"comparable":[148],"results":[149],"neighboring":[151],"counts,":[153],"indicating":[154],"robustness":[155],"granularity.":[158],"Overall,":[159],"proposed":[161],"provides":[163],"reproducible,":[165],"interpretable,":[166],"computationally":[168],"efficient":[169],"pipeline":[170],"organization,":[173],"trend":[174],"analysis,":[175],"metadata":[177],"enhancement":[178],"domains.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-02-01T00:00:00"}
