{"id":"https://openalex.org/W7147207511","doi":"https://doi.org/10.48550/arxiv.2603.26772","title":"From Content to Audience: A Multimodal Annotation Framework for Broadcast Television Analytics","display_name":"From Content to Audience: A Multimodal Annotation Framework for Broadcast Television Analytics","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7147207511","doi":"https://doi.org/10.48550/arxiv.2603.26772"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26772","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26772","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132555547","display_name":"Paolo Cupini","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cupini, Paolo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5013385420","display_name":"Francesco Pierri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pierri, Francesco","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5132555547"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.7653999924659729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.7653999924659729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07119999825954437,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.02459999918937683,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6966000199317932},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.641700029373169},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5351999998092651},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.4350000023841858},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.37929999828338623},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.3440999984741211},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3296000063419342}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8087000250816345},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6966000199317932},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.641700029373169},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5351999998092651},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.4350000023841858},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41110000014305115},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3840000033378601},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3458999991416931},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.3440999984741211},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3296000063419342},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3271999955177307},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.3215999901294708},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3181999921798706},{"id":"https://openalex.org/C2778598663","wikidata":"https://www.wikidata.org/wiki/Q1407599","display_name":"Video content analysis","level":4,"score":0.31119999289512634},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2833999991416931},{"id":"https://openalex.org/C91285054","wikidata":"https://www.wikidata.org/wiki/Q1144899","display_name":"Broadcast television systems","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.26649999618530273},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26772","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26772","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26772","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automated":[0],"semantic":[1,78],"annotation":[2,57],"of":[3,55,73,193,205],"broadcast":[4,61,171],"television":[5,62],"content":[6,86],"presents":[7,51],"distinctive":[8],"challenges,":[9],"combining":[10,119],"structured":[11],"audiovisual":[12],"composition,":[13],"domain-specific":[14,71],"editorial":[15],"patterns,":[16],"and":[17,40,88,111,127,197],"strict":[18],"operational":[19,203],"constraints.":[20],"While":[21],"multimodal":[22,56,154],"large":[23],"language":[24],"models":[25,141,148],"(MLLMs)":[26],"have":[27],"demonstrated":[28],"strong":[29],"general-purpose":[30],"video":[31,135],"understanding":[32],"capabilities,":[33],"their":[34],"comparative":[35],"effectiveness":[36],"across":[37,76,98],"pipeline":[38,94,165],"architectures":[39,95],"input":[41,117,136],"configurations":[42],"in":[43,64],"broadcast-specific":[44],"settings":[45],"remains":[46],"empirically":[47],"undercharacterized.":[48],"This":[49,188],"paper":[50],"a":[52,70],"systematic":[53],"evaluation":[54],"pipelines":[58],"applied":[59],"to":[60,158],"news":[63],"the":[65,163,202,206],"Italian":[66,185],"setting.":[67],"We":[68],"construct":[69],"benchmark":[72],"clips":[74],"labeled":[75],"four":[77],"dimensions:":[79],"visual":[80,120],"environment":[81],"classification,":[82,84],"topic":[83],"sensitive":[85],"detection,":[87],"named":[89],"entity":[90],"recognition.":[91],"Two":[92],"different":[93],"are":[96,137],"evaluated":[97],"nine":[99],"frontier":[100],"models,":[101],"including":[102],"Gemini":[103],"3.0":[104],"Pro,":[105],"LLaMA":[106],"4":[107],"Maverick,":[108],"Qwen-VL":[109],"variants,":[110],"Gemma":[112],"3,":[113],"under":[114,152],"progressively":[115],"enriched":[116],"strategies":[118],"signals,":[121],"automatic":[122],"speech":[123],"recognition,":[124],"speaker":[125],"diarization,":[126],"metadata.":[128],"Experimental":[129],"results":[130],"demonstrate":[131],"that":[132],"gains":[133],"from":[134],"strongly":[138],"model-dependent:":[139],"larger":[140],"effectively":[142],"leverage":[143],"temporal":[144],"continuity,":[145],"while":[146],"smaller":[147],"show":[149],"performance":[150],"degradation":[151],"extended":[153],"context,":[155],"likely":[156],"due":[157],"token":[159],"overload.":[160],"Beyond":[161],"benchmarking,":[162],"selected":[164],"is":[166],"deployed":[167],"on":[168],"14":[169],"full":[170],"episodes,":[172],"with":[173,177],"minute-level":[174],"annotations":[175],"integrated":[176],"normalized":[178],"audience":[179,195,211],"measurement":[180],"data":[181],"provided":[182],"by":[183],"an":[184],"media":[186],"company.":[187],"integration":[189],"enables":[190],"correlational":[191],"analysis":[192],"topic-level":[194],"sensitivity":[196],"generational":[198],"engagement":[199],"divergence,":[200],"demonstrating":[201],"viability":[204],"proposed":[207],"framework":[208],"for":[209],"content-based":[210],"analytics.":[212]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
