{"id":"https://openalex.org/W7133319145","doi":"https://doi.org/10.48550/arxiv.2603.01945","title":"When Numbers Tell Half the Story: Human-Metric Alignment in Topic Model Evaluation","display_name":"When Numbers Tell Half the Story: Human-Metric Alignment in Topic Model Evaluation","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133319145","doi":"https://doi.org/10.48550/arxiv.2603.01945"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01945","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01945","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066641077","display_name":"Thibault Prouteau","orcid":"https://orcid.org/0000-0001-9610-3191"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Prouteau, Thibault","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017621115","display_name":"Francis Lareau","orcid":"https://orcid.org/0000-0002-0352-5246"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lareau, Francis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060620618","display_name":"Nicolas Dugu\u00e9","orcid":"https://orcid.org/0000-0002-6150-1939"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dugu\u00e9, Nicolas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lamirel, Jean-Charles","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lamirel, Jean-Charles","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128017002","display_name":"Christophe Malaterre","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Malaterre, Christophe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5066641077"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.8813999891281128,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.8813999891281128,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.029400000348687172,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.009700000286102295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.6151999831199646},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6151000261306763},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5777000188827515},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.5770000219345093},{"id":"https://openalex.org/keywords/thematic-structure","display_name":"Thematic structure","score":0.47870001196861267},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.44510000944137573},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4041999876499176},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.38760000467300415}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8251000046730042},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6383000016212463},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.6151999831199646},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6151000261306763},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6119999885559082},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5777000188827515},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.5770000219345093},{"id":"https://openalex.org/C2778109090","wikidata":"https://www.wikidata.org/wiki/Q7781195","display_name":"Thematic structure","level":2,"score":0.47870001196861267},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4041999876499176},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.38760000467300415},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.38280001282691956},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36419999599456787},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C175293574","wikidata":"https://www.wikidata.org/wiki/Q697133","display_name":"Word lists by frequency","level":3,"score":0.3192000091075897},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3034999966621399},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C93692415","wikidata":"https://www.wikidata.org/wiki/Q1502030","display_name":"Thematic map","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.2734000086784363},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C95318506","wikidata":"https://www.wikidata.org/wiki/Q6588467","display_name":"Textual entailment","level":3,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01945","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01945","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01945","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Topic":[0,61],"models":[1,107],"uncover":[2],"latent":[3],"thematic":[4],"structures":[5],"in":[6,16,155],"text":[7],"corpora,":[8],"yet":[9],"evaluating":[10],"their":[11],"quality":[12],"remains":[13],"challenging,":[14],"particularly":[15,154,193],"specialized":[17,156],"domains.":[18],"Existing":[19],"methods":[20,126],"often":[21],"rely":[22],"on":[23,55,92,128],"automated":[24,121,189],"metrics":[25,122,149],"like":[26],"topic":[27,106],"coherence":[28,94,148],"and":[29,52,95,111,147,158,176,190],"diversity,":[30],"which":[31],"may":[32],"not":[33,151],"fully":[34],"align":[35,167],"with":[36,123,168],"human":[37,67,124,191],"judgment.":[38],"Human":[39],"evaluation":[40,68,125,186],"tasks,":[41],"such":[42],"as":[43],"word":[44,80,89,145],"intrusion,":[45],"provide":[46],"valuable":[47],"insights":[48],"but":[49],"are":[50],"costly":[51],"primarily":[53],"validated":[54],"general-domain":[56],"corpora.":[57,196],"This":[58,180],"paper":[59],"introduces":[60],"Word":[62],"Mixing":[63],"(TWM),":[64],"a":[65,97,133],"novel":[66],"task":[69,177],"assessing":[70],"inter-topic":[71],"distinctness":[72,163],"by":[73],"testing":[74],"whether":[75],"annotators":[76],"can":[77],"distinguish":[78],"between":[79],"sets":[81],"from":[82,132],"single":[83],"or":[84],"mixed":[85],"topics.":[86],"TWM":[87,160],"complements":[88],"intrusion's":[90],"focus":[91],"intra-topic":[93],"provides":[96],"human-grounded":[98],"counterpart":[99],"to":[100,166],"diversity":[101,169],"metrics.":[102,170],"We":[103,171],"evaluate":[104],"six":[105],"-":[108,119],"both":[109],"statistical":[110],"embedding-based":[112],"(LDA,":[113],"NMF,":[114],"Top2Vec,":[115],"BERTopic,":[116],"CFMF,":[117],"CFMF-emb)":[118],"comparing":[120],"based":[127],"nearly":[129],"4,000":[130],"annotations":[131],"domain-specific":[134,195],"corpus":[135],"of":[136,138],"philosophy":[137],"science":[139],"publications.":[140],"Our":[141],"findings":[142],"reveal":[143],"that":[144,159],"intrusion":[146],"do":[150],"always":[152],"align,":[153],"domains,":[157],"captures":[161],"human-perceived":[162],"while":[164],"appearing":[165],"release":[172],"the":[173,183],"annotated":[174],"dataset":[175],"generation":[178],"code.":[179],"work":[181],"highlights":[182],"need":[184],"for":[185,194],"frameworks":[187],"bridging":[188],"assessments,":[192]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-04T00:00:00"}
