{"id":"https://openalex.org/W4390523405","doi":"https://doi.org/10.1145/3595916.3626389","title":"Cross-modal Image-Recipe Retrieval via Multimodal Fusion","display_name":"Cross-modal Image-Recipe Retrieval via Multimodal Fusion","publication_year":2023,"publication_date":"2023-12-06","ids":{"openalex":"https://openalex.org/W4390523405","doi":"https://doi.org/10.1145/3595916.3626389"},"language":"en","primary_location":{"id":"doi:10.1145/3595916.3626389","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626389","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626389","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626389","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101703729","display_name":"Lijie Li","orcid":"https://orcid.org/0009-0003-6380-6714"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lijie Li","raw_affiliation_strings":["Harbin Engineering University, China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076153264","display_name":"Caiyue Hu","orcid":"https://orcid.org/0009-0006-5891-6267"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Caiyue Hu","raw_affiliation_strings":["Harbin Engineering University, China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075197319","display_name":"Haitao Zhang","orcid":"https://orcid.org/0000-0002-0291-1372"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haitao Zhang","raw_affiliation_strings":["Harbin Engineering University, China"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037713610","display_name":"Akshita Maradapu Vera Venkata Sai","orcid":null},"institutions":[{"id":"https://openalex.org/I4322298","display_name":"Towson University","ror":"https://ror.org/044w7a341","country_code":"US","type":"education","lineage":["https://openalex.org/I4322298"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Akshita Maradapu Vera Venkata sai","raw_affiliation_strings":["Towson University, USA"],"affiliations":[{"raw_affiliation_string":"Towson University, USA","institution_ids":["https://openalex.org/I4322298"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101703729"],"corresponding_institution_ids":["https://openalex.org/I151727225"],"apc_list":null,"apc_paid":null,"fwci":0.596,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.70416672,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recipe","display_name":"Recipe","score":0.9345947504043579},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7687543630599976},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5808499455451965},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5722262859344482},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5569776296615601},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5193411111831665},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5058936476707458},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4512150287628174},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4501219093799591},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4357547163963318},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4098990559577942},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3920353949069977},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34285232424736023}],"concepts":[{"id":"https://openalex.org/C2778671685","wikidata":"https://www.wikidata.org/wiki/Q219239","display_name":"Recipe","level":2,"score":0.9345947504043579},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7687543630599976},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5808499455451965},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5722262859344482},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5569776296615601},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5193411111831665},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5058936476707458},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4512150287628174},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4501219093799591},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4357547163963318},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4098990559577942},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3920353949069977},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34285232424736023},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C31903555","wikidata":"https://www.wikidata.org/wiki/Q1637030","display_name":"Food science","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3595916.3626389","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626389","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626389","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3595916.3626389","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626389","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626389","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4577689320","display_name":null,"funder_award_id":"62072136","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8199206254","display_name":null,"funder_award_id":"2020YFB1710200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390523405.pdf","grobid_xml":"https://content.openalex.org/works/W4390523405.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W1905882502","https://openalex.org/W1933349210","https://openalex.org/W2194775991","https://openalex.org/W2737041163","https://openalex.org/W2897152025","https://openalex.org/W2962964995","https://openalex.org/W2963055199","https://openalex.org/W2981073781","https://openalex.org/W3004153761","https://openalex.org/W3005971801","https://openalex.org/W3009290009","https://openalex.org/W3035032757","https://openalex.org/W3094293870","https://openalex.org/W3132583427","https://openalex.org/W3135367836","https://openalex.org/W3137684688","https://openalex.org/W3164325351","https://openalex.org/W3197950609","https://openalex.org/W4294658815"],"related_works":["https://openalex.org/W258429745","https://openalex.org/W3161239248","https://openalex.org/W1584543623","https://openalex.org/W2561508161","https://openalex.org/W3195543079","https://openalex.org/W2098178683","https://openalex.org/W2740680361","https://openalex.org/W3187068967","https://openalex.org/W4301143707","https://openalex.org/W2952745240"],"abstract_inverted_index":{"Cross-modal":[0],"image-recipe":[1,86,180],"retrieval":[2,21,168,178],"aims":[3],"to":[4,55,67,78,102,122,142,157,175],"capture":[5,31],"the":[6,30,38,49,108,113,131,144,162,166,186,192,195,204,211],"correlation":[7],"between":[8,42,161],"food":[9],"images":[10,148],"and":[11,36,70,83,110,126,149,206,220],"recipes.":[12,150],"While":[13],"existing":[14],"methods":[15],"have":[16],"demonstrated":[17],"good":[18],"performance":[19,200],"on":[20,120,185,202],"tasks,":[22],"they":[23],"often":[24],"overlook":[25],"two":[26,163],"crucial":[27],"aspects:":[28],"(1)":[29],"of":[32,40,100,146,179,194,216],"fine-grained":[33],"recipe":[34,69,94,132],"information":[35],"(2)":[37],"consideration":[39],"correlations":[41],"embeddings":[43],"from":[44,112],"different":[45],"modalities.":[46,164],"We":[47],"introduce":[48],"Multimodal":[50],"Fusion":[51],"Retrieval":[52],"Framework":[53],"(MFRF)":[54],"address":[56],"these":[57],"issues.":[58],"The":[59,93,134],"proposed":[60,196],"framework":[61],"utilizes":[62,97],"a":[63,75,171],"deep":[64],"learning-based":[65],"encoder":[66],"process":[68],"image":[71],"data":[72],"effectively,":[73],"incorporates":[74,138],"fusion":[76,136],"network":[77],"learn":[79],"cross-modal":[80,154,167,177],"semantic":[81],"alignment,":[82],"ultimately":[84],"achieves":[85],"retrieval.":[87],"MFRF":[88],"comprises":[89],"three":[90],"integral":[91],"modules.":[92],"preprocessing":[95],"module":[96,137,169],"various":[98],"levels":[99],"Transformer":[101],"extract":[103],"essential":[104],"features":[105],"such":[106],"as":[107],"title":[109],"ingredients":[111],"recipe.":[114],"Additionally,":[115],"it":[116,152],"employs":[117,170],"LSTM":[118],"based":[119],"BERT":[121],"establish":[123],"contextual":[124],"relationships":[125],"dependencies":[127],"among":[128],"sentences":[129],"in":[130],"instructions.":[133],"multimodal":[135],"visual-linguistic":[139],"contrastive":[140],"losses":[141],"align":[143],"representations":[145],"both":[147,203],"Moreover,":[151],"leverages":[153],"attention":[155],"mechanisms":[156],"facilitate":[158],"effective":[159],"interaction":[160],"Lastly,":[165],"triple":[172],"loss":[173],"function":[174],"enable":[176],"pairs.":[181],"Experimental":[182],"evaluations":[183],"conducted":[184],"widely-used":[187],"Recipe1M":[188],"benchmark":[189],"dataset":[190],"demonstrate":[191],"effectiveness":[193],"MFRF,":[197],"achieving":[198],"substantial":[199],"improvements":[201],"1k":[205],"10k":[207],"test":[208],"sets.":[209],"Specifically,":[210],"results":[212],"indicate":[213],"an":[214],"increase":[215],"+9.9%":[217],"(64.8":[218],"R@1)":[219,223],"+8.4%":[221],"(33.7":[222],"respectively.":[224]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
