{"id":"https://openalex.org/W4414034987","doi":"https://doi.org/10.1145/3705328.3759303","title":"Describe What You See with Multimodal Large Language Models to Enhance Video Recommendations","display_name":"Describe What You See with Multimodal Large Language Models to Enhance Video Recommendations","publication_year":2025,"publication_date":"2025-09-06","ids":{"openalex":"https://openalex.org/W4414034987","doi":"https://doi.org/10.1145/3705328.3759303"},"language":"en","primary_location":{"id":"doi:10.1145/3705328.3759303","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3705328.3759303","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth ACM Conference on Recommender Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.09789","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081236916","display_name":"Marco De Nadai","orcid":"https://orcid.org/0000-0001-8466-3933"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Marco De Nadai","raw_affiliation_strings":["Spotify, Copenhagen, Denmark"],"raw_orcid":"https://orcid.org/0000-0001-8466-3933","affiliations":[{"raw_affiliation_string":"Spotify, Copenhagen, Denmark","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114706123","display_name":"Andreas Damianou","orcid":"https://orcid.org/0009-0007-7194-4155"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andreas Damianou","raw_affiliation_strings":["Spotify, Cambridge, United Kingdom"],"raw_orcid":"https://orcid.org/0009-0007-7194-4155","affiliations":[{"raw_affiliation_string":"Spotify, Cambridge, United Kingdom","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002597222","display_name":"Mounia Lalmas","orcid":"https://orcid.org/0000-0002-3531-3096"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mounia Lalmas","raw_affiliation_strings":["Spotify, London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-3531-3096","affiliations":[{"raw_affiliation_string":"Spotify, London, United Kingdom","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5081236916"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34227696,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1159","last_page":"1163"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7410581111907959},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3641440272331238},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.36299633979797363}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7410581111907959},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3641440272331238},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.36299633979797363}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3705328.3759303","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3705328.3759303","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth ACM Conference on Recommender Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2508.09789","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.09789","pdf_url":"https://arxiv.org/pdf/2508.09789","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.09789","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.09789","pdf_url":"https://arxiv.org/pdf/2508.09789","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W2512971201","https://openalex.org/W2767826403","https://openalex.org/W2963367478","https://openalex.org/W2982108874","https://openalex.org/W3088694469","https://openalex.org/W3093242741","https://openalex.org/W3166751026","https://openalex.org/W3202003978","https://openalex.org/W4290857499","https://openalex.org/W4290927925","https://openalex.org/W4306317504","https://openalex.org/W4312772544","https://openalex.org/W4327656803","https://openalex.org/W4372260310","https://openalex.org/W4395008914","https://openalex.org/W4396723167","https://openalex.org/W4400531346","https://openalex.org/W4400909732","https://openalex.org/W4403221552","https://openalex.org/W4409365832","https://openalex.org/W4409671560"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Existing":[0],"video":[1,200],"recommender":[2],"systems":[3],"rely":[4],"primarily":[5],"on":[6,10,26,55],"user-defined":[7],"metadata":[8,178],"or":[9,58],"low-level":[11,21],"visual":[12],"and":[13,37,128,137,149,156,177],"acoustic":[14],"signals":[15],"extracted":[16],"by":[17,103],"specialised":[18],"encoders.":[19],"These":[20],"features":[22,179],"describe":[23],"what":[24],"appears":[25],"the":[27,64,100,132,160,187],"screen":[28],"but":[29],"miss":[30],"deeper":[31],"semantics":[32,98],"such":[33],"as":[34,192],"intent,":[35],"humour,":[36],"world":[38],"knowledge":[39,194],"that":[40,95],"make":[41],"clips":[42],"resonate":[43],"with":[44,125,144,167],"viewers.":[45],"For":[46],"example,":[47],"is":[48],"a":[49,53,56,89,117,145],"30-second":[50],"clip":[51,115],"simply":[52],"singer":[54],"rooftop,":[57],"an":[59,105],"ironic":[60],"parody":[61,124],"filmed":[62],"amid":[63],"fairy":[65],"chimneys":[66],"of":[67,189],"Cappadocia,":[68],"Turkey?":[69],"Such":[70],"distinctions":[71],"are":[72],"critical":[73],"to":[74,80,112,196],"personalised":[75],"recommendations":[76],"yet":[77],"remain":[78],"invisible":[79],"traditional":[81],"encoding":[82],"pipelines.":[83],"In":[84],"this":[85],"paper,":[86],"we":[87],"introduce":[88],"simple,":[90],"recommendation":[91,101],"system-agnostic":[92],"zero-finetuning":[93],"framework":[94,171],"injects":[96],"high-level":[97],"into":[99,116,152],"pipeline":[102],"prompting":[104],"off-the-shelf":[106],"Multimodal":[107],"Large":[108],"Language":[109],"Model":[110],"(MLLM)":[111],"summarise":[113],"each":[114],"rich":[118],"natural-language":[119],"description":[120],"(e.g.":[121],"\"a":[122],"superhero":[123],"slapstick":[126],"fights":[127],"orchestral":[129],"stabs\"),":[130],"bridging":[131],"gap":[133],"between":[134],"raw":[135],"content":[136],"user":[138,165],"intent.":[139],"We":[140],"use":[141],"MLLM":[142],"output":[143],"state-of-the-art":[146],"text":[147],"encoder":[148],"feed":[150],"it":[151],"standard":[153],"collaborative,":[154],"content-based,":[155],"generative":[157],"recommenders.":[158,201],"On":[159],"MicroLens-100K":[161],"dataset,":[162],"which":[163],"emulates":[164],"interactions":[166],"TikTok-style":[168],"videos,":[169],"our":[170],"consistently":[172],"surpasses":[173],"conventional":[174],"video,":[175],"audio,":[176],"in":[180],"five":[181],"representative":[182],"models.":[183],"Our":[184],"findings":[185],"highlight":[186],"promise":[188],"leveraging":[190],"MLLMs":[191],"on-the-fly":[193],"extractors":[195],"build":[197],"more":[198],"intent-aware":[199]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
