{"id":"https://openalex.org/W4406495728","doi":"https://doi.org/10.1109/bigdata62323.2024.10825030","title":"Visual Summary Thought of Large Vision-Language Models for Multimodal Recommendation","display_name":"Visual Summary Thought of Large Vision-Language Models for Multimodal Recommendation","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406495728","doi":"https://doi.org/10.1109/bigdata62323.2024.10825030"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825030","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100767516","display_name":"Yuqing Liu","orcid":"https://orcid.org/0000-0001-9828-5646"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuqing Liu","raw_affiliation_strings":["University of Illinois at Chicago,Chicago,IL,United States"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Chicago,Chicago,IL,United States","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100770991","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0002-0100-5992"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["University of Illinois at Chicago,Chicago,IL,United States"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Chicago,Chicago,IL,United States","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100552849","display_name":"Yuwei Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuwei Cao","raw_affiliation_strings":["Meta,Menlo Park,CA,United States"],"affiliations":[{"raw_affiliation_string":"Meta,Menlo Park,CA,United States","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015105117","display_name":"Lichao Sun","orcid":"https://orcid.org/0000-0003-1539-7939"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lichao Sun","raw_affiliation_strings":["Lehigh University,Bethlehem,PA,United States"],"affiliations":[{"raw_affiliation_string":"Lehigh University,Bethlehem,PA,United States","institution_ids":["https://openalex.org/I186143895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036357902","display_name":"Philip S. Yu","orcid":"https://orcid.org/0000-0002-3491-5968"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philip S. Yu","raw_affiliation_strings":["University of Illinois at Chicago,Chicago,IL,United States"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Chicago,Chicago,IL,United States","institution_ids":["https://openalex.org/I39422238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100767516"],"corresponding_institution_ids":["https://openalex.org/I39422238"],"apc_list":null,"apc_paid":null,"fwci":0.8118,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82868108,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"456","last_page":"461"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.756325364112854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49650460481643677},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4235360324382782},{"id":"https://openalex.org/keywords/visual-language","display_name":"Visual language","score":0.41417139768600464},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3896483778953552},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3418225347995758},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1669623851776123}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.756325364112854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49650460481643677},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4235360324382782},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.41417139768600464},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3896483778953552},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3418225347995758},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1669623851776123},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825030","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2914304175","https://openalex.org/W2963655167","https://openalex.org/W3156977692","https://openalex.org/W3192113933","https://openalex.org/W3205778609","https://openalex.org/W4306317797","https://openalex.org/W4312583258","https://openalex.org/W4312824836","https://openalex.org/W4313483544","https://openalex.org/W4320024211","https://openalex.org/W4320342563","https://openalex.org/W4322718576","https://openalex.org/W4323706279","https://openalex.org/W4366330503","https://openalex.org/W4376632836","https://openalex.org/W4385567149","https://openalex.org/W4385567794","https://openalex.org/W4386148496","https://openalex.org/W4386289268","https://openalex.org/W4387294588","https://openalex.org/W4388093101","https://openalex.org/W4388482628","https://openalex.org/W4389519587","https://openalex.org/W4389520443","https://openalex.org/W4389768131","https://openalex.org/W4389974595","https://openalex.org/W4390437716","https://openalex.org/W4390489684","https://openalex.org/W4391244746","https://openalex.org/W4392846385","https://openalex.org/W4395075604","https://openalex.org/W4396758674","https://openalex.org/W4400919817","https://openalex.org/W4401397240","https://openalex.org/W4402671548","https://openalex.org/W4403780613","https://openalex.org/W6809646742","https://openalex.org/W6810162553","https://openalex.org/W6851592950","https://openalex.org/W6851950068"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0],"evolution":[1],"of":[2,13,26,31,58,106,126,133,152],"large":[3],"vision-language":[4],"models":[5],"(LVLMs)":[6],"has":[7],"shed":[8],"light":[9],"on":[10],"the":[11,124,131,150],"development":[12],"many":[14],"fields,":[15],"particularly":[16],"for":[17,93],"multimodal":[18],"recommendation.":[19],"While":[20],"LVLMs":[21,48,63,101],"offer":[22],"an":[23],"integrated":[24],"understanding":[25],"textual":[27,104],"and":[28,55,73,129,147],"visual":[29],"information":[30],"items":[32],"from":[33,51],"user":[34,60],"interactions,":[35],"their":[36],"deployment":[37],"in":[38,76],"this":[39],"domain":[40],"remains":[41],"limited":[42],"due":[43],"to":[44,102,122],"inherent":[45],"complexities.":[46],"First,":[47],"are":[49,116],"trained":[50],"enormous":[52],"general":[53],"datasets":[54,140],"lack":[56],"knowledge":[57],"personalized":[59],"preferences.":[61],"Second,":[62],"struggle":[64],"with":[65,70,119],"multiple":[66],"image":[67],"processing,":[68],"especially":[69],"discrete,":[71],"noisy,":[72],"redundant":[74],"images":[75],"recommendation":[77],"scenarios.":[78],"To":[79],"address":[80],"these":[81],"issues,":[82],"we":[83],"introduce":[84],"a":[85],"new":[86],"reasoning":[87],"strategy":[88],"called":[89],"Visual-Summary":[90],"Thought":[91],"(VST)":[92],"Multimodal":[94],"Recommendation.":[95],"This":[96],"approach":[97],"begins":[98],"by":[99],"prompting":[100],"generate":[103],"summaries":[105,115],"item":[107,120],"images,":[108],"which":[109],"serve":[110],"as":[111],"contextual":[112],"information.":[113],"These":[114],"then":[117],"combined":[118],"titles":[121],"enhance":[123],"representation":[125],"sequential":[127],"interactions":[128],"improve":[130],"ranking":[132],"candidates.":[134],"Our":[135],"experiments,":[136],"conducted":[137],"across":[138],"four":[139],"using":[141],"three":[142],"different":[143],"LVLMs:":[144],"GPT4-V,":[145],"LLaVA-7b,":[146],"LLaVA-13b":[148],"validate":[149],"effectiveness":[151],"VST.":[153]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
