{"id":"https://openalex.org/W4415541295","doi":"https://doi.org/10.1145/3746027.3755734","title":"RecipeRAG: Advancing Recipe Generation with Reinforced Retrieval Augmented Generation","display_name":"RecipeRAG: Advancing Recipe Generation with Reinforced Retrieval Augmented Generation","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415541295","doi":"https://doi.org/10.1145/3746027.3755734"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755734","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755734","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jinghan Yang","orcid":"https://orcid.org/0009-0001-9868-4556"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jinghan Yang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021459992","display_name":"Zhenbo Xu","orcid":"https://orcid.org/0000-0002-8948-1589"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenbo Xu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China and ShiFang Technology Inc., Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China and ShiFang Technology Inc., Hangzhou, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112262580","display_name":"Davtian Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dehua Ma","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100324254","display_name":"Liu Liu","orcid":"https://orcid.org/0000-0001-8730-5824"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liu Liu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101783051","display_name":"Fei Liu","orcid":"https://orcid.org/0000-0003-0266-6896"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Liu","raw_affiliation_strings":["Zhejiang University, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Zhejiang, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103472415","display_name":"Gong Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gong Huang","raw_affiliation_strings":["Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057318866","display_name":"Zhaofeng He","orcid":"https://orcid.org/0000-0002-3433-8435"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaofeng He","raw_affiliation_strings":["Beijing University of Post and Telecommunication, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Post and Telecommunication, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30036258,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5060","last_page":"5069"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9648000001907349,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recipe","display_name":"Recipe","score":0.9639999866485596},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7014999985694885},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6758999824523926},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5565999746322632},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4894999861717224},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4659999907016754},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.44359999895095825},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.37439998984336853}],"concepts":[{"id":"https://openalex.org/C2778671685","wikidata":"https://www.wikidata.org/wiki/Q219239","display_name":"Recipe","level":2,"score":0.9639999866485596},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7925000190734863},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7132999897003174},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7014999985694885},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6758999824523926},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.586899995803833},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5565999746322632},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4894999861717224},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.44359999895095825},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.36739999055862427},{"id":"https://openalex.org/C2780589914","wikidata":"https://www.wikidata.org/wiki/Q10675206","display_name":"Ingredient","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.275299996137619},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2538999915122986},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755734","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755734","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8061379860","display_name":null,"funder_award_id":"No.62176025,No. 62301066,No. 62206012,No. 62406028","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2737041163","https://openalex.org/W2948037078","https://openalex.org/W2954801189","https://openalex.org/W2963055199","https://openalex.org/W2963997278","https://openalex.org/W3035032757","https://openalex.org/W3120867192","https://openalex.org/W3137684688","https://openalex.org/W4292787334","https://openalex.org/W4319996536","https://openalex.org/W4388189856","https://openalex.org/W4388778348","https://openalex.org/W4394596514","https://openalex.org/W4400453116","https://openalex.org/W4403841197","https://openalex.org/W4403841656"],"related_works":[],"abstract_inverted_index":{"Generating":[0],"accurate":[1],"recipes":[2,51,125,152],"from":[3,46],"dish":[4],"images":[5,47],"is":[6],"a":[7,12,98],"challenging":[8],"task":[9],"that":[10,249],"requires":[11],"deep":[13,81],"understanding":[14],"of":[15,36,76,176,212,235],"food":[16],"categories,":[17],"ingredient":[18,62],"combinations,":[19],"cooking":[20],"methods,":[21],"and":[22,48,56,109,116,141,147,163,181,258],"context.":[23],"Current":[24],"works":[25],"mainly":[26],"rely":[27],"on":[28,53,243],"the":[29,73,77,90,111,117,128,158,167,174,177,183,191,197,209,215,232,244,264],"two-stage":[30],"training":[31,78,194],"method":[32],"or":[33],"supervised":[34,112],"fine-tuning":[35,113,119],"vision-language":[37],"models":[38,41],"(VLMs).":[39],"Two-stage":[40],"typically":[42],"first":[43],"predict":[44],"ingredients":[45,55],"then":[49,154],"generate":[50,226],"based":[52],"both":[54,138],"images.":[57],"However,":[58,166],"accumulated":[59],"errors":[60],"in":[61,89,237,255],"prediction":[63],"often":[64,170],"lead":[65],"to":[66,86,127,134,156,172,207,225],"inaccurate":[67],"recipes.":[68,92],"Fine-tuning":[69],"VLMs":[70,236],"only":[71],"fit":[72],"statistical":[74],"patterns":[75],"data,":[79],"lacking":[80],"reasoning":[82,210],"capabilities,":[83],"which":[84],"leads":[85],"severe":[87],"hallucinations":[88],"generated":[91,159,201],"In":[93],"this":[94],"paper,":[95],"we":[96,131,188],"introduce":[97],"novel":[99],"reinforced":[100],"retrieval-augmented":[101],"generation":[102,239,257],"framework":[103],"named":[104],"RecipeRAG":[105,250],"for":[106,205],"recipe":[107,179,185,238,256],"generation,":[108],"compare":[110],"(SFT)":[114],"paradigm":[115],"reinforcement":[118,216],"(RFT)":[120],"paradigm.":[121,266],"To":[122],"effectively":[123],"retrieve":[124],"relevant":[126],"query":[129],"image,":[130],"improve":[132],"CLIP":[133],"obtain":[135],"IR-CLIP":[136],"as":[137],"our":[139],"retriever":[140],"re-ranker":[142],"by":[143],"integrating":[144],"metric":[145],"learning":[146,217],"contrastive":[148],"learning.":[149],"The":[150],"retrieved":[151,178],"are":[153],"used":[155],"enhance":[157],"results,":[160],"improving":[161],"accuracy":[162],"reducing":[164],"hallucinations.":[165],"SFT":[168,206],"VLM":[169],"fails":[171],"judge":[173],"quality":[175],"information":[180],"perform":[182],"complex":[184],"generation.":[186],"Therefore,":[187],"furthermore":[189],"investigate":[190],"two-phase":[192],"RFT":[193],"framework.":[195],"Firstly,":[196],"cold-start":[198],"phase":[199,218],"uses":[200],"Chain-of-Thought":[202],"(CoT)":[203],"data":[204],"activate":[208],"capabilities":[211],"VLMs.":[213],"Then,":[214],"utilizes":[219],"Group":[220],"Relative":[221],"Policy":[222],"Optimization":[223],"(GRPO)":[224],"multiple":[227],"reasoning-answer":[228],"pairs,":[229],"further":[230],"enhancing":[231],"generalization":[233,261],"ability":[234,262],"tasks.":[240],"Extensive":[241],"evaluations":[242],"large-scale":[245],"Recipe1M":[246],"dataset":[247],"demonstrate":[248],"outperforms":[251],"all":[252],"previous":[253],"methods":[254],"exhibits":[259],"strong":[260],"under":[263],"RL":[265]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-25T00:00:00"}
