{"id":"https://openalex.org/W4393972806","doi":"https://doi.org/10.1109/tmm.2024.3384672","title":"Cross-Modal Recipe Retrieval With Fine-Grained Prompting Alignment and Evidential Semantic Consistency","display_name":"Cross-Modal Recipe Retrieval With Fine-Grained Prompting Alignment and Evidential Semantic Consistency","publication_year":2024,"publication_date":"2024-04-05","ids":{"openalex":"https://openalex.org/W4393972806","doi":"https://doi.org/10.1109/tmm.2024.3384672"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3384672","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3384672","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016659919","display_name":"Xu Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xu Huang","raw_affiliation_strings":["School of Computer Science, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327072","display_name":"Jin Liu","orcid":"https://orcid.org/0000-0003-0359-0248"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Liu","raw_affiliation_strings":["School of Computer Science, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100685384","display_name":"Zhizhong Zhang","orcid":"https://orcid.org/0000-0001-6905-4478"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizhong Zhang","raw_affiliation_strings":["School of Computer Science and Technology, East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110042813","display_name":"Yuan Xie","orcid":"https://orcid.org/0000-0001-6945-7437"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Xie","raw_affiliation_strings":["School of Computer Science and Technology, East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109012722","display_name":"Yongqiang Tang","orcid":"https://orcid.org/0000-0001-9333-8200"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongqiang Tang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100414787","display_name":"Wensheng Zhang","orcid":"https://orcid.org/0000-0003-0752-941X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wensheng Zhang","raw_affiliation_strings":["State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041280931","display_name":"Xiaohui Cui","orcid":"https://orcid.org/0000-0001-6079-009X"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohui Cui","raw_affiliation_strings":["School of Cyber Science and Engineering, Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5016659919"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":0.735,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.6926176,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"27","issue":null,"first_page":"2783","last_page":"2794"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8177658319473267},{"id":"https://openalex.org/keywords/recipe","display_name":"Recipe","score":0.6176267862319946},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6162100434303284},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5653669238090515},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5457004308700562},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5128505229949951},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.48638680577278137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8177658319473267},{"id":"https://openalex.org/C2778671685","wikidata":"https://www.wikidata.org/wiki/Q219239","display_name":"Recipe","level":2,"score":0.6176267862319946},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6162100434303284},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5653669238090515},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5457004308700562},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5128505229949951},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48638680577278137},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C31903555","wikidata":"https://www.wikidata.org/wiki/Q1637030","display_name":"Food science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3384672","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3384672","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3059242050","display_name":null,"funder_award_id":"61972290","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320312071","display_name":"Ministry of Education, Libya","ror":"https://ror.org/02w030k33"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329781","display_name":"Hubei University","ror":"https://ror.org/03a60m280"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1971955426","https://openalex.org/W2064675550","https://openalex.org/W2194775991","https://openalex.org/W2737041163","https://openalex.org/W2948037078","https://openalex.org/W2963055199","https://openalex.org/W2963997278","https://openalex.org/W2995924296","https://openalex.org/W3034667500","https://openalex.org/W3035032757","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3094502228","https://openalex.org/W3096831136","https://openalex.org/W3137684688","https://openalex.org/W3153694119","https://openalex.org/W3164325351","https://openalex.org/W3170706004","https://openalex.org/W3171668871","https://openalex.org/W3173220247","https://openalex.org/W3185341429","https://openalex.org/W3186223381","https://openalex.org/W3197950609","https://openalex.org/W3198377975","https://openalex.org/W3204119129","https://openalex.org/W3206660842","https://openalex.org/W3207562294","https://openalex.org/W3207798279","https://openalex.org/W3208601917","https://openalex.org/W4226058394","https://openalex.org/W4234587807","https://openalex.org/W4289639868","https://openalex.org/W4292787334","https://openalex.org/W4304080716","https://openalex.org/W4312310776","https://openalex.org/W4312325116","https://openalex.org/W4312784228","https://openalex.org/W4313178921","https://openalex.org/W4385245566","https://openalex.org/W6739651123","https://openalex.org/W6752745768","https://openalex.org/W6767278793","https://openalex.org/W6768698167","https://openalex.org/W6789909235","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6797716411","https://openalex.org/W6798805250"],"related_works":["https://openalex.org/W258429745","https://openalex.org/W3161239248","https://openalex.org/W1584543623","https://openalex.org/W2561508161","https://openalex.org/W3195543079","https://openalex.org/W2098178683","https://openalex.org/W2740680361","https://openalex.org/W3207562294","https://openalex.org/W3187068967","https://openalex.org/W2604742737"],"abstract_inverted_index":{"Alignment":[0,138],"between":[1,39,94],"the":[2,6,19,40,43,50,60,75,78,92,95,105,120,143,157,162,175,185,190,194,200],"food":[3,27,113,124],"images":[4,97,125],"and":[5,31,55,98,126,137,146,189],"corresponding":[7],"recipes":[8,20,51,99],"is":[9,67],"an":[10,167],"emerging":[11],"cross-modal":[12,163,176,195],"representation":[13],"learning":[14],"task.":[15],"In":[16],"this":[17],"task,":[18],"are":[21,84],"composed":[22],"of":[23,42,77,112,123,202],"three":[24],"components,":[25],"i.e.,":[26],"title,":[28],"ingredient":[29],"lists,":[30],"cooking":[32],"instructions,":[33],"which":[34],"require":[35],"a":[36,134],"fine-grained":[37,152],"alignment":[38,93],"features":[41],"two":[44,79,88],"modalities.":[45,80],"Existing":[46],"methods":[47,72,83],"usually":[48],"aggregate":[49],"into":[52],"global":[53,61,96],"embeddings":[54,76,100],"then":[56],"align":[57],"them":[58],"with":[59],"image":[62],"embeddings.":[63],"Meanwhile,":[64],"semantic":[65,121,158,177],"classification":[66,122],"frequently":[68],"used":[69],"in":[70,103,119,161],"these":[71,82,130],"to":[73,116,141,155,173],"regularize":[74,156],"While":[81],"efficient,":[85],"there":[86],"remain":[87],"problems:":[89],"(1)":[90],"Forcing":[91],"may":[101],"result":[102],"losing":[104],"component-specific":[106,149],"information.":[107],"(2)":[108],"The":[109],"high":[110,117],"diversity":[111],"appearance":[114],"leads":[115],"uncertainty":[118],"recipes.":[127],"To":[128],"solve":[129],"problems,":[131],"we":[132,165],"propose":[133],"Fine-grained":[135],"Prompting":[136],"(FPA)":[139],"model":[140],"enhance":[142],"feature":[144],"extraction":[145],"bring":[147],"more":[148],"information":[150,159],"for":[151],"alignment.":[153],"Furthermore,":[154],"contained":[160],"features,":[164],"design":[166],"Evidential":[168],"Semantic":[169],"Consistency":[170],"(ESC)":[171],"loss":[172],"keep":[174],"consistency.":[178],"We":[179],"have":[180],"conducted":[181],"comprehensive":[182],"experiments":[183],"on":[184,193],"benchmark":[186],"dataset":[187],"Recipe1M":[188],"state-of-the-art":[191],"results":[192],"recipe":[196],"retrieval":[197],"task":[198],"demonstrate":[199],"effectiveness":[201],"our":[203],"method.":[204]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
