{"id":"https://openalex.org/W7162771512","doi":"https://doi.org/10.48550/arxiv.2605.29765","title":"MMTM: Tri-Modal Topic Modeling for Long-Form Video via Similarity-Gated Fusion","display_name":"MMTM: Tri-Modal Topic Modeling for Long-Form Video via Similarity-Gated Fusion","publication_year":2026,"publication_date":"2026-05-28","ids":{"openalex":"https://openalex.org/W7162771512","doi":"https://doi.org/10.48550/arxiv.2605.29765"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.29765","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.29765","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137326082","display_name":"Ali Abusaleh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abusaleh, Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137386348","display_name":"Bhuvanesh Verma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Verma, Bhuvanesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137366380","display_name":"Alexander Mehler","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mehler, Alexander","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.27309998869895935,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.27309998869895935,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.15530000627040863,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.1062999963760376,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.6129000186920166},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5307999849319458},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5123000144958496},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5120999813079834},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.47369998693466187},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.44769999384880066},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.3813999891281128},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.37770000100135803},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.3596000075340271}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7427999973297119},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.6129000186920166},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5637999773025513},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5307999849319458},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5138000249862671},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5123000144958496},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5120999813079834},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49410000443458557},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.47369998693466187},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.44769999384880066},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.3596000075340271},{"id":"https://openalex.org/C8854915","wikidata":"https://www.wikidata.org/wiki/Q4350200","display_name":"Information transfer","level":2,"score":0.34470000863075256},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.33649998903274536},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.30469998717308044},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.28110000491142273},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C2778304055","wikidata":"https://www.wikidata.org/wiki/Q657474","display_name":"Beijing","level":3,"score":0.27889999747276306},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C194232998","wikidata":"https://www.wikidata.org/wiki/Q1606712","display_name":"Transition (genetics)","level":3,"score":0.26930001378059387},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26100000739097595},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.29765","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.29765","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29765","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,103],"introduce":[1],"MMTM,":[2],"a":[3,24,109],"modular":[4],"pipeline":[5,106],"for":[6],"topic":[7,43,114],"discovery":[8],"in":[9],"long-form":[10],"video":[11,113],"that":[12],"integrates":[13],"speech":[14],"recognition,":[15],"audio":[16],"and":[17,20,33,57,68,94,108,120],"visual":[18,118],"embeddings,":[19],"BERTopic":[21],"clustering":[22],"through":[23],"deterministic":[25],"similarity-gated":[26],"fusion.":[27],"Evaluated":[28],"cross-lingually":[29],"on":[30,89],"German":[31,90],"(Tagesschau)":[32],"English":[34],"(NBC)":[35],"broadcast":[36],"news,":[37],"joint":[38],"tri-modal":[39],"modeling":[40],"substantially":[41],"improves":[42,75],"quality:":[44],"noise":[45],"drops":[46],"from":[47,53,61,85],"0.27":[48],"to":[49,55,63,87,98],"0.06,":[50],"transition":[51],"rate":[52],"0.70":[54],"0.21,":[56],"normalized":[58],"entropy":[59],"rises":[60,84],"0.84":[62],"0.92,":[64],"indicating":[65],"more":[66],"coherent":[67],"temporally":[69],"stable":[70],"topics.":[71],"Cluster":[72],"validity":[73],"(Calinski-Harabasz)":[74],"by":[76],"5-12X":[77],"across":[78],"embedding":[79],"spaces.":[80],"Lexical":[81],"coherence":[82],"(NPMI)":[83],"0.77":[86],"0.86":[88],"but":[91],"is":[92],"corpus-dependent":[93],"does":[95],"not":[96],"transfer":[97],"the":[99,105],"shorter":[100],"NBC":[101],"broadcasts.":[102],"release":[104],"code":[107],"human-validated":[110],"54-hour":[111],"multimodal":[112],"corpus":[115],"with":[116],"dual-annotator":[117],"evaluation":[119],"LLM-assisted":[121],"labeling.":[122]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-30T00:00:00"}
