{"id":"https://openalex.org/W7131434261","doi":"https://doi.org/10.48550/arxiv.2602.19079","title":"TriTopic: Tri-Modal Graph-Based Topic Modeling with Iterative Refinement and Archetypes","display_name":"TriTopic: Tri-Modal Graph-Based Topic Modeling with Iterative Refinement and Archetypes","publication_year":2026,"publication_date":"2026-02-22","ids":{"openalex":"https://openalex.org/W7131434261","doi":"https://doi.org/10.48550/arxiv.2602.19079"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.19079","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19079","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.19079","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006138536","display_name":"Roman Egger","orcid":"https://orcid.org/0000-0003-4888-6026"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Egger, Roman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":["https://openalex.org/A5006138536"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.5285000205039978,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.5285000205039978,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10499999672174454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.08590000122785568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.6328999996185303},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5892000198364258},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5748999714851379},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.492900013923645},{"id":"https://openalex.org/keywords/iterative-and-incremental-development","display_name":"Iterative and incremental development","score":0.47290000319480896},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.4277999997138977},{"id":"https://openalex.org/keywords/iterative-refinement","display_name":"Iterative refinement","score":0.42239999771118164},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.39980000257492065}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7649000287055969},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.6328999996185303},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5892000198364258},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5748999714851379},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.492900013923645},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.47290000319480896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4325000047683716},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4277999997138977},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.42239999771118164},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.39980000257492065},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38580000400543213},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3675000071525574},{"id":"https://openalex.org/C2780273121","wikidata":"https://www.wikidata.org/wiki/Q109411","display_name":"Curse","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3495999872684479},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.34290000796318054},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.3345000147819519},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2957000136375427},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27900001406669617},{"id":"https://openalex.org/C76363472","wikidata":"https://www.wikidata.org/wiki/Q1437394","display_name":"Formal concept analysis","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.19079","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19079","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.19079","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.19079","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Topic":[0],"modeling":[1],"extracts":[2],"latent":[3],"themes":[4],"from":[5],"large":[6],"text":[7],"collections,":[8],"but":[9],"leading":[10],"approaches":[11],"like":[12],"BERTopic":[13],"face":[14],"critical":[15],"limitations:":[16],"stochastic":[17],"instability,":[18],"loss":[19],"of":[20,74],"lexical":[21],"precision":[22],"(\"Embedding":[23],"Blur\"),":[24],"and":[25,49,63,70,83,120,150],"reliance":[26],"on":[27,127],"a":[28,35,42],"single":[29],"data":[30],"perspective.":[31],"We":[32],"present":[33],"TriTopic,":[34],"framework":[36],"that":[37,86],"addresses":[38],"these":[39],"weaknesses":[40],"through":[41,89],"tri-modal":[43],"graph":[44,58],"fusing":[45],"semantic":[46],"embeddings,":[47],"TF-IDF,":[48],"metadata.":[50],"Three":[51],"core":[52],"innovations":[53],"drive":[54],"its":[55],"performance:":[56],"hybrid":[57],"construction":[59],"via":[60],"Mutual":[61],"kNN":[62],"Shared":[64],"Nearest":[65],"Neighbors":[66],"to":[67],"eliminate":[68],"noise":[69],"combat":[71],"the":[72,95,124],"curse":[73],"dimensionality;":[75],"Consensus":[76],"Leiden":[77],"Clustering":[78],"for":[79,135,138,141],"reproducible,":[80],"stable":[81],"partitions;":[82],"Iterative":[84],"Refinement":[85],"sharpens":[87],"embeddings":[88],"dynamic":[90],"centroid-pulling.":[91],"TriTopic":[92,122],"also":[93],"replaces":[94],"\"average":[96],"document\"":[97],"concept":[98],"with":[99,147],"archetype-based":[100],"topic":[101],"representations":[102],"defined":[103],"by":[104],"boundary":[105],"cases":[106],"rather":[107],"than":[108],"centers":[109],"alone.":[110],"In":[111],"benchmarks":[112],"across":[113],"20":[114],"Newsgroups,":[115],"BBC":[116],"News,":[117,119],"AG":[118],"Arxiv,":[121],"achieves":[123],"highest":[125],"NMI":[126,131],"every":[128],"dataset":[129],"(mean":[130],"0.575":[132],"vs.":[133],"0.513":[134],"BERTopic,":[136],"0.416":[137],"NMF,":[139],"0.299":[140],"LDA),":[142],"guarantees":[143],"100%":[144],"corpus":[145],"coverage":[146],"0%":[148],"outliers,":[149],"is":[151],"available":[152],"as":[153],"an":[154],"open-source":[155],"PyPI":[156],"library.":[157]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-02-26T00:00:00"}
