{"id":"https://openalex.org/W4400444056","doi":"https://doi.org/10.3390/make6030074","title":"Navigating the Multimodal Landscape: A Review on Integration of Text and Image Data in Machine Learning Architectures","display_name":"Navigating the Multimodal Landscape: A Review on Integration of Text and Image Data in Machine Learning Architectures","publication_year":2024,"publication_date":"2024-07-09","ids":{"openalex":"https://openalex.org/W4400444056","doi":"https://doi.org/10.3390/make6030074"},"language":"en","primary_location":{"id":"doi:10.3390/make6030074","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make6030074","pdf_url":"https://www.mdpi.com/2504-4990/6/3/74/pdf?version=1720518949","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/6/3/74/pdf?version=1720518949","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076953008","display_name":"Maisha Binte Rashid","orcid":"https://orcid.org/0009-0009-4781-5593"},"institutions":[{"id":"https://openalex.org/I157394403","display_name":"Baylor University","ror":"https://ror.org/005781934","country_code":"US","type":"education","lineage":["https://openalex.org/I157394403"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maisha Binte Rashid","raw_affiliation_strings":["Department of Computer Science, Baylor University, Waco, TX 76706, USA"],"raw_orcid":"https://orcid.org/0009-0009-4781-5593","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Baylor University, Waco, TX 76706, USA","institution_ids":["https://openalex.org/I157394403"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039308586","display_name":"Md Shahidur Rahaman","orcid":"https://orcid.org/0009-0009-4472-5541"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Shahidur Rahaman","raw_affiliation_strings":["Department of Computer Science, Texas A&M University, College Station, TX 77843, USA"],"raw_orcid":"https://orcid.org/0009-0009-4472-5541","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Texas A&M University, College Station, TX 77843, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045360354","display_name":"Pablo Rivas","orcid":"https://orcid.org/0000-0002-8690-0987"},"institutions":[{"id":"https://openalex.org/I157394403","display_name":"Baylor University","ror":"https://ror.org/005781934","country_code":"US","type":"education","lineage":["https://openalex.org/I157394403"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Pablo Rivas","raw_affiliation_strings":["Department of Computer Science, Baylor University, Waco, TX 76706, USA"],"raw_orcid":"https://orcid.org/0000-0002-8690-0987","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Baylor University, Waco, TX 76706, USA","institution_ids":["https://openalex.org/I157394403"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5045360354"],"corresponding_institution_ids":["https://openalex.org/I157394403"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":3.0125,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.9248643,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"6","issue":"3","first_page":"1545","last_page":"1563"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8108338713645935},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6452987194061279},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.558609664440155},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4851723313331604},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4636003077030182},{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.4387586712837219}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8108338713645935},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6452987194061279},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.558609664440155},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4851723313331604},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4636003077030182},{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.4387586712837219},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/make6030074","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make6030074","pdf_url":"https://www.mdpi.com/2504-4990/6/3/74/pdf?version=1720518949","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:783c570368f349d5a77501df3c7bcd35","is_oa":false,"landing_page_url":"https://doaj.org/article/783c570368f349d5a77501df3c7bcd35","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 6, Iss 3, Pp 1545-1563 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make6030074","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make6030074","pdf_url":"https://www.mdpi.com/2504-4990/6/3/74/pdf?version=1720518949","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2351599477","display_name":null,"funder_award_id":"2210091","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7182044910","display_name":null,"funder_award_id":"2136961","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4400444056.pdf"},"referenced_works_count":82,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2619383789","https://openalex.org/W2765401382","https://openalex.org/W2897729462","https://openalex.org/W2936150952","https://openalex.org/W2963633722","https://openalex.org/W2966715458","https://openalex.org/W2967127151","https://openalex.org/W2970231061","https://openalex.org/W2985076077","https://openalex.org/W2995167577","https://openalex.org/W2998875866","https://openalex.org/W3000092951","https://openalex.org/W3002085040","https://openalex.org/W3003417734","https://openalex.org/W3011727199","https://openalex.org/W3021459865","https://openalex.org/W3034727271","https://openalex.org/W3084911298","https://openalex.org/W3088631780","https://openalex.org/W3105354225","https://openalex.org/W3111911126","https://openalex.org/W3114540572","https://openalex.org/W3118085629","https://openalex.org/W3119970235","https://openalex.org/W3120315535","https://openalex.org/W3126337491","https://openalex.org/W3126799815","https://openalex.org/W3127184348","https://openalex.org/W3135367836","https://openalex.org/W3162249403","https://openalex.org/W3163610861","https://openalex.org/W3168463823","https://openalex.org/W3169040021","https://openalex.org/W3170251192","https://openalex.org/W3173220247","https://openalex.org/W3173395783","https://openalex.org/W3174027106","https://openalex.org/W3176063860","https://openalex.org/W3184679245","https://openalex.org/W3196315368","https://openalex.org/W3202464651","https://openalex.org/W4200097352","https://openalex.org/W4200189367","https://openalex.org/W4200314231","https://openalex.org/W4211119877","https://openalex.org/W4221046698","https://openalex.org/W4225323055","https://openalex.org/W4281480469","https://openalex.org/W4281703735","https://openalex.org/W4283722442","https://openalex.org/W4284882302","https://openalex.org/W4285362180","https://openalex.org/W4285404671","https://openalex.org/W4285815569","https://openalex.org/W4293193531","https://openalex.org/W4308885870","https://openalex.org/W4310584540","https://openalex.org/W4311935989","https://openalex.org/W4312411705","https://openalex.org/W4312551640","https://openalex.org/W4312763117","https://openalex.org/W4312781831","https://openalex.org/W4312784228","https://openalex.org/W4312937942","https://openalex.org/W4313413261","https://openalex.org/W4313591130","https://openalex.org/W4317514677","https://openalex.org/W4324117717","https://openalex.org/W4361732189","https://openalex.org/W4375857401","https://openalex.org/W4379046666","https://openalex.org/W4379116392","https://openalex.org/W4379745834","https://openalex.org/W4379881562","https://openalex.org/W4382053310","https://openalex.org/W4382999323","https://openalex.org/W4384207668","https://openalex.org/W4384945912","https://openalex.org/W6810334672","https://openalex.org/W6848859580","https://openalex.org/W6851914854"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Images":[0],"and":[1,22,29,35,43,66,70,88,92,97,106,112,119,164,217],"text":[2,31,69,87,163],"have":[3,122,141],"become":[4],"essential":[5],"parts":[6],"of":[7,83,199,243],"the":[8,104,197,226,241],"multimodal":[9,200],"machine":[10],"learning":[11],"(MMML)":[12],"framework":[13],"in":[14,76,151,179,236],"today\u2019s":[15],"world":[16],"because":[17],"data":[18,72,90],"are":[19,168,192],"always":[20],"available,":[21],"technological":[23],"breakthroughs":[24],"bring":[25],"disparate":[26],"forms":[27],"together,":[28],"while":[30],"adds":[32],"semantic":[33],"richness":[34],"narrative":[36],"to":[37,103,132,148,175,188,195,231,239],"images,":[38,167],"images":[39],"capture":[40],"visual":[41],"subtleties":[42],"emotions.":[44],"Together,":[45],"these":[46,203,233],"two":[47],"media":[48],"improve":[49,240],"knowledge":[50],"beyond":[51],"what":[52],"would":[53],"be":[54],"possible":[55],"with":[56],"just":[57],"one":[58],"revolutionary":[59],"application.":[60],"This":[61],"paper":[62],"investigates":[63],"feature":[64,172],"extraction":[65,173],"advancement":[67],"from":[68,127,185],"image":[71,89],"using":[73],"pre-trained":[74,157],"models":[75,206],"MMML.":[77],"It":[78],"offers":[79],"a":[80,136],"thorough":[81,137],"analysis":[82],"fusion":[84],"architectures,":[85],"outlining":[86],"integration":[91],"evaluating":[93],"their":[94,176],"overall":[95],"advantages":[96],"effects.":[98],"Furthermore,":[99],"it":[100],"draws":[101],"attention":[102,190],"shortcomings":[105],"difficulties":[107],"that":[108,115,145,156],"MMML":[109,150,205,244],"currently":[110],"faces":[111],"guides":[113],"areas":[114],"need":[116],"more":[117],"research":[118,125,143,230],"development.":[120],"We":[121],"gathered":[123],"341":[124],"articles":[126],"five":[128],"digital":[129],"library":[130],"databases":[131],"accomplish":[133],"this.":[134],"Following":[135],"assessment":[138],"procedure,":[139],"we":[140],"88":[142],"papers":[144],"enable":[146],"us":[147],"evaluate":[149],"detail.":[152],"Our":[153,223],"findings":[154,224],"demonstrate":[155],"models,":[158],"such":[159],"as":[160],"BERT":[161],"for":[162,166,171,228],"ResNet":[165],"predominantly":[169],"employed":[170],"due":[174],"robust":[177],"performance":[178],"diverse":[180],"applications.":[181],"Fusion":[182],"techniques,":[183],"ranging":[184],"simple":[186],"concatenation":[187],"advanced":[189],"mechanisms,":[191],"extensively":[193],"adopted":[194],"enhance":[196],"representation":[198],"data.":[201],"Despite":[202],"advancements,":[204],"face":[207],"significant":[208],"challenges,":[209,234],"including":[210],"handling":[211],"noisy":[212],"data,":[213],"optimizing":[214],"dataset":[215],"size,":[216],"ensuring":[218],"robustness":[219,242],"against":[220],"adversarial":[221],"attacks.":[222],"highlight":[225],"necessity":[227],"further":[229],"address":[232],"particularly":[235],"developing":[237],"methods":[238],"models.":[245]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":3}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
