{"id":"https://openalex.org/W4390493621","doi":"https://doi.org/10.1109/snams60348.2023.10375440","title":"Pushing Boundaries: Exploring Zero Shot Object Classification with Large Multimodal Models","display_name":"Pushing Boundaries: Exploring Zero Shot Object Classification with Large Multimodal Models","publication_year":2023,"publication_date":"2023-11-21","ids":{"openalex":"https://openalex.org/W4390493621","doi":"https://doi.org/10.1109/snams60348.2023.10375440"},"language":"en","primary_location":{"id":"doi:10.1109/snams60348.2023.10375440","is_oa":false,"landing_page_url":"https://doi.org/10.1109/snams60348.2023.10375440","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Tenth International Conference on Social Networks Analysis, Management and Security (SNAMS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027517223","display_name":"Ashhadul Islam","orcid":"https://orcid.org/0000-0002-9717-3252"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":true,"raw_author_name":"Ashhadul Islam","raw_affiliation_strings":["College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0002-9717-3252","affiliations":[{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051403530","display_name":"Md. Rafiul Biswas","orcid":"https://orcid.org/0000-0002-5145-1990"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Md. Rafiul Biswas","raw_affiliation_strings":["College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0002-5145-1990","affiliations":[{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047390057","display_name":"Wajdi Zaghouani","orcid":"https://orcid.org/0000-0003-1521-5568"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Wajdi Zaghouani","raw_affiliation_strings":["College of Humanities and Social Sciences, Hamad Bin Khalifa University,Doha,Qatar","College of Humanities and Social Sciences, Hamad Bin Khalifa University, Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0003-1521-5568","affiliations":[{"raw_affiliation_string":"College of Humanities and Social Sciences, Hamad Bin Khalifa University,Doha,Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"College of Humanities and Social Sciences, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053691175","display_name":"Samir Brahim Belhaouari","orcid":"https://orcid.org/0000-0003-2336-0490"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Samir Brahim Belhaouari","raw_affiliation_strings":["College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0003-2336-0490","affiliations":[{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081437624","display_name":"Zubair Shah","orcid":"https://orcid.org/0000-0001-7389-3274"},"institutions":[{"id":"https://openalex.org/I4210144839","display_name":"Hamad bin Khalifa University","ror":"https://ror.org/03eyq4y97","country_code":"QA","type":"education","lineage":["https://openalex.org/I4210144839"]}],"countries":["QA"],"is_corresponding":false,"raw_author_name":"Zubair Shah","raw_affiliation_strings":["College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar"],"raw_orcid":"https://orcid.org/0000-0001-7389-3274","affiliations":[{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University,Doha,Qatar","institution_ids":["https://openalex.org/I4210144839"]},{"raw_affiliation_string":"College of Science and Engineering, Hamad Bin Khalifa University, Doha, Qatar","institution_ids":["https://openalex.org/I4210144839"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5027517223"],"corresponding_institution_ids":["https://openalex.org/I4210144839"],"apc_list":null,"apc_paid":null,"fwci":0.8242,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.75847668,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7609184384346008},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7357864379882812},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6730881929397583},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5995270609855652},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5428656339645386},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.4681915044784546},{"id":"https://openalex.org/keywords/mnist-database","display_name":"MNIST database","score":0.4631812572479248},{"id":"https://openalex.org/keywords/transformative-learning","display_name":"Transformative learning","score":0.4345965087413788},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.4244082570075989},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.41776782274246216},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3720521330833435},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.319923460483551}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7609184384346008},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7357864379882812},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6730881929397583},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5995270609855652},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5428656339645386},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.4681915044784546},{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.4631812572479248},{"id":"https://openalex.org/C70587473","wikidata":"https://www.wikidata.org/wiki/Q7834111","display_name":"Transformative learning","level":2,"score":0.4345965087413788},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.4244082570075989},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.41776782274246216},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3720521330833435},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.319923460483551},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/snams60348.2023.10375440","is_oa":false,"landing_page_url":"https://doi.org/10.1109/snams60348.2023.10375440","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 Tenth International Conference on Social Networks Analysis, Management and Security (SNAMS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2007339694","https://openalex.org/W2963518342","https://openalex.org/W3166396011","https://openalex.org/W4242011677","https://openalex.org/W4281613651","https://openalex.org/W4281885183","https://openalex.org/W4366330503","https://openalex.org/W4375870049","https://openalex.org/W4385019259","https://openalex.org/W4385336911","https://openalex.org/W4385391943","https://openalex.org/W4386045865","https://openalex.org/W4402727764"],"related_works":["https://openalex.org/W4394785709","https://openalex.org/W4296978181","https://openalex.org/W2912987408","https://openalex.org/W2937381246","https://openalex.org/W3004801820","https://openalex.org/W4281672036","https://openalex.org/W4313444753","https://openalex.org/W4230582276","https://openalex.org/W4387163578","https://openalex.org/W4206451978"],"abstract_inverted_index":{"The":[0,52,133],"synergy":[1],"of":[2,54,71,135,146,184,186,201,221],"language":[3,48],"and":[4,13,49,123,150,189,223],"vision":[5,36],"models":[6,16,33],"has":[7],"given":[8],"rise":[9],"to":[10,19,193,206],"Large":[11,39,55],"Language":[12,40],"Vision":[14],"Assistant":[15],"(LLVAs),":[17],"designed":[18,93],"engage":[20],"users":[21],"in":[22,46,63,85,227],"rich":[23],"conversational":[24],"experiences":[25],"intertwined":[26],"with":[27,38,188,213],"image-based":[28],"queries.":[29],"These":[30,210],"comprehensive":[31],"multimodal":[32],"seamlessly":[34],"integrate":[35],"encoders":[37],"Models":[41,57],"(LLMs),":[42],"expanding":[43],"their":[44,83,224],"applications":[45,226],"general-purpose":[47],"visual":[50],"comprehension.":[51],"advent":[53],"Multimodal":[56],"(LMMs)":[58],"heralds":[59],"a":[60,77,108,180,198],"new":[61],"era":[62],"Artificial":[64],"Intelligence":[65],"(AI)":[66],"assistance,":[67],"extending":[68],"the":[69,100,139,153,165,195,218],"horizons":[70],"AI":[72],"utilization.":[73],"This":[74],"paper":[75],"takes":[76],"unique":[78],"perspective":[79],"on":[80],"LMMs,":[81],"exploring":[82],"efficacy":[84],"performing":[86],"image":[87],"classification":[88,144],"tasks":[89],"using":[90],"tailored":[91],"prompts":[92],"for":[94,152,170],"specific":[95,171],"datasets.":[96],"We":[97],"also":[98],"investigate":[99],"LLVAs":[101,222],"zero-shot":[102],"learning":[103],"capabilities.":[104],"Our":[105],"study":[106],"includes":[107],"benchmarking":[109],"analysis":[110],"across":[111],"four":[112],"diverse":[113],"datasets:":[114],"MNIST,":[115],"Cats":[116],"Vs.":[117,121,129],"Dogs,":[118],"Hymnoptera":[119],"(Ants":[120],"Bees),":[122],"an":[124],"unconventional":[125],"dataset":[126,181],"comprising":[127,182],"Pox":[128],"Non-Pox":[130],"skin":[131],"images.":[132],"results":[134],"our":[136,161,214],"experiments":[137],"demonstrate":[138],"model's":[140,166],"remarkable":[141],"performance,":[142],"achieving":[143],"accuracies":[145],"85%,":[147],"100%,":[148],"77%,":[149],"79%":[151],"respective":[154],"datasets":[155],"without":[156,190],"any":[157],"fine-tuning.":[158,209],"To":[159],"bolster":[160],"analysis,":[162],"we":[163],"assess":[164],"performance":[167],"post":[168,208],"fine-tuning":[169,176],"tasks.":[172],"In":[173],"one":[174],"instance,":[175],"is":[177],"conducted":[178],"over":[179],"images":[183],"faces":[185],"children":[187],"autism.":[191],"Prior":[192],"fine-tuning,":[194],"model":[196],"demonstrated":[197],"test":[199],"accuracy":[200],"55%,":[202],"which":[203],"significantly":[204],"improved":[205],"83%":[207],"results,":[211],"coupled":[212],"prior":[215],"findings,":[216],"underscore":[217],"transformative":[219],"potential":[220],"versatile":[225],"real-world":[228],"scenarios.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
