{"id":"https://openalex.org/W4403790991","doi":"https://doi.org/10.1145/3664647.3681332","title":"XMeCap: Meme Caption Generation with Sub-Image Adaptability","display_name":"XMeCap: Meme Caption Generation with Sub-Image Adaptability","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403790991","doi":"https://doi.org/10.1145/3664647.3681332"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681332","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060201245","display_name":"Yuyan Chen","orcid":"https://orcid.org/0009-0003-5888-5518"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuyan Chen","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111333834","display_name":"Songzhou Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songzhou Yan","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102856192","display_name":"Zhihong Zhu","orcid":"https://orcid.org/0009-0001-4530-5516"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihong Zhu","raw_affiliation_strings":["Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065529268","display_name":"Zhixu Li","orcid":"https://orcid.org/0000-0003-2355-288X"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixu Li","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090455375","display_name":"Yanghua Xiao","orcid":"https://orcid.org/0000-0001-8403-9591"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanghua Xiao","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5060201245"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.9971,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.77824258,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3352","last_page":"3361"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.800011157989502},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6945823431015015},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5343363285064697},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5276347994804382},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5048268437385559},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.35880184173583984}],"concepts":[{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.800011157989502},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6945823431015015},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5343363285064697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5276347994804382},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5048268437385559},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.35880184173583984},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681332","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3664647.3681332","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2001432772","https://openalex.org/W2076576639","https://openalex.org/W2101105183","https://openalex.org/W2187930957","https://openalex.org/W2600463316","https://openalex.org/W2740887992","https://openalex.org/W2798778974","https://openalex.org/W2808936280","https://openalex.org/W2941932279","https://openalex.org/W2954841306","https://openalex.org/W3000666430","https://openalex.org/W3120554508","https://openalex.org/W3176724088","https://openalex.org/W3205255970","https://openalex.org/W3207721564","https://openalex.org/W3211356157","https://openalex.org/W4280510162","https://openalex.org/W4321485324","https://openalex.org/W4386076084","https://openalex.org/W4387848667","https://openalex.org/W4387848774","https://openalex.org/W4389518867","https://openalex.org/W4389518961","https://openalex.org/W4392384808","https://openalex.org/W4393160465","https://openalex.org/W4400585812","https://openalex.org/W4402670784","https://openalex.org/W4402670786","https://openalex.org/W4402671740","https://openalex.org/W4402683962","https://openalex.org/W4402684058","https://openalex.org/W6671225572","https://openalex.org/W6911516946","https://openalex.org/W6949129192"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Humor,":[0],"deeply":[1],"rooted":[2],"in":[3,20,28,75,95,143,153,158],"societal":[4],"meanings":[5],"and":[6,64,78,83,101,121,132,155],"cultural":[7],"details,":[8],"poses":[9,38],"a":[10,29,39,57,92,140,159],"unique":[11],"challenge":[12],"for":[13,98,118,123],"machines.":[14],"While":[15],"advances":[16],"have":[17],"been":[18],"made":[19],"natural":[21],"language":[22],"processing,":[23],"real-world":[24],"humor":[25,157],"often":[26],"thrives":[27],"multi-modal":[30,160],"context,":[31],"encapsulated":[32],"distinctively":[33],"by":[34,130],"memes.":[35],"This":[36,135],"paper":[37],"particular":[40],"emphasis":[41],"on":[42,47,68],"the":[43,54,127,149],"impact":[44],"of":[45,116,151],"multi-images":[46],"meme":[48,108],"captioning.":[49],"After":[50],"that,":[51],"we":[52],"introduce":[53],"XMeCap":[55,110],"framework,":[56],"novel":[58],"approach":[59],"that":[60],"adopts":[61],"supervised":[62],"fine-tuning":[63],"reinforcement":[65],"learning":[66],"based":[67],"an":[69,112],"innovative":[70],"reward":[71],"model,":[72],"which":[73],"factors":[74],"both":[76,99],"global":[77],"local":[79],"similarities":[80],"between":[81],"visuals":[82],"text.":[84],"Our":[85],"results,":[86],"benchmarked":[87],"against":[88],"contemporary":[89],"models,":[90],"manifest":[91],"marked":[93],"improvement":[94],"caption":[96],"generation":[97],"single-image":[100,119],"multi-image":[102,124],"memes,":[103,125],"as":[104,106],"well":[105],"different":[107],"categories.":[109],"achieves":[111],"average":[113],"evaluation":[114],"score":[115],"75.85":[117],"memes":[120],"66.32":[122],"outperforming":[126],"best":[128],"baseline":[129],"3.71%":[131],"4.82%,":[133],"respectively.":[134],"research":[136],"not":[137],"only":[138],"establishes":[139],"new":[141],"frontier":[142],"meme-related":[144],"studies":[145],"but":[146],"also":[147],"underscores":[148],"potential":[150],"machines":[152],"understanding":[154],"generating":[156],"setting.":[161]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
