{"id":"https://openalex.org/W7131078371","doi":"https://doi.org/10.1109/iccvw69036.2025.00637","title":"ByDeWay: Boost Your Multimodal LLM with DEpth Prompting in a Training-Free Way","display_name":"ByDeWay: Boost Your Multimodal LLM with DEpth Prompting in a Training-Free Way","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7131078371","doi":"https://doi.org/10.1109/iccvw69036.2025.00637"},"language":null,"primary_location":{"id":"doi:10.1109/iccvw69036.2025.00637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078181730","display_name":"Rajarshi Roy","orcid":"https://orcid.org/0000-0002-0202-1169"},"institutions":[{"id":"https://openalex.org/I77501641","display_name":"University of Kalyani","ror":"https://ror.org/03v783k16","country_code":"IN","type":"education","lineage":["https://openalex.org/I77501641"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Rajarshi Roy","raw_affiliation_strings":["Kalyani Government Engineering College,India"],"affiliations":[{"raw_affiliation_string":"Kalyani Government Engineering College,India","institution_ids":["https://openalex.org/I77501641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032131431","display_name":"Devleena Das","orcid":"https://orcid.org/0000-0002-7644-6303"},"institutions":[{"id":"https://openalex.org/I77501641","display_name":"University of Kalyani","ror":"https://ror.org/03v783k16","country_code":"IN","type":"education","lineage":["https://openalex.org/I77501641"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Devleena Das","raw_affiliation_strings":["Kalyani Government Engineering College,India"],"affiliations":[{"raw_affiliation_string":"Kalyani Government Engineering College,India","institution_ids":["https://openalex.org/I77501641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126583494","display_name":"Ankesh Banerjee","orcid":null},"institutions":[{"id":"https://openalex.org/I77501641","display_name":"University of Kalyani","ror":"https://ror.org/03v783k16","country_code":"IN","type":"education","lineage":["https://openalex.org/I77501641"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ankesh Banerjee","raw_affiliation_strings":["Kalyani Government Engineering College,India"],"affiliations":[{"raw_affiliation_string":"Kalyani Government Engineering College,India","institution_ids":["https://openalex.org/I77501641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038880035","display_name":"A. Bhattacharjee","orcid":null},"institutions":[{"id":"https://openalex.org/I77501641","display_name":"University of Kalyani","ror":"https://ror.org/03v783k16","country_code":"IN","type":"education","lineage":["https://openalex.org/I77501641"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Arjya Bhattacharjee","raw_affiliation_strings":["Kalyani Government Engineering College,India"],"affiliations":[{"raw_affiliation_string":"Kalyani Government Engineering College,India","institution_ids":["https://openalex.org/I77501641"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kousik Dasgupta","orcid":null},"institutions":[{"id":"https://openalex.org/I77501641","display_name":"University of Kalyani","ror":"https://ror.org/03v783k16","country_code":"IN","type":"education","lineage":["https://openalex.org/I77501641"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kousik Dasgupta","raw_affiliation_strings":["Kalyani Government Engineering College,India"],"affiliations":[{"raw_affiliation_string":"Kalyani Government Engineering College,India","institution_ids":["https://openalex.org/I77501641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103091648","display_name":"Subarna Tripathi","orcid":"https://orcid.org/0000-0002-2757-4923"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Subarna Tripathi","raw_affiliation_strings":["Intel Labs,USA"],"affiliations":[{"raw_affiliation_string":"Intel Labs,USA","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5078181730"],"corresponding_institution_ids":["https://openalex.org/I77501641"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86549871,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6117","last_page":"6123"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3806000053882599,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3806000053882599,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3075000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.1136000007390976,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5896999835968018},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5490000247955322},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.42750000953674316},{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.421099990606308},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.42010000348091125},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4066999852657318},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.37790000438690186},{"id":"https://openalex.org/keywords/keystroke-logging","display_name":"Keystroke logging","score":0.3294000029563904}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7567999958992004},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5896999835968018},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5817000269889832},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5490000247955322},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.527899980545044},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43230000138282776},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.42750000953674316},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.421099990606308},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42010000348091125},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4066999852657318},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4000999927520752},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.37790000438690186},{"id":"https://openalex.org/C161615301","wikidata":"https://www.wikidata.org/wiki/Q309396","display_name":"Keystroke logging","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.32100000977516174},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C71611378","wikidata":"https://www.wikidata.org/wiki/Q5165191","display_name":"Contextual design","level":3,"score":0.2978000044822693},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.27219998836517334},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccvw69036.2025.00637","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccvw69036.2025.00637","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W2745461083","https://openalex.org/W2891374529","https://openalex.org/W2963518342","https://openalex.org/W3182910454","https://openalex.org/W4221167937","https://openalex.org/W4285606658","https://openalex.org/W4389523832","https://openalex.org/W4402669802","https://openalex.org/W4402727018","https://openalex.org/W4404782797","https://openalex.org/W4408265609","https://openalex.org/W4416036904"],"related_works":[],"abstract_inverted_index":{"We":[0],"introduce":[1],"a":[2,6,21,79,138],"training-free":[3,7],"framework,":[4],"ByDeWay,":[5],"method":[8,107],"to":[9,46,96],"boost":[10],"the":[11,30,57,98,132],"performances":[12],"of":[13,36,134],"Multimodal":[14,37],"Large":[15,38],"Language":[16,39],"Models.":[17],"Specifically,":[18],"ByDe-Way":[19],"leverages":[20],"novel":[22],"prompting":[23,136],"strategy,":[24],"Layered-Depth-Based":[25],"Prompting":[26],"(LDP),":[27],"that":[28],"enhances":[29],"spatial":[31,49],"reasoning":[32],"and":[33,70,74,103,111,121],"grounding":[34],"capabilities":[35],"Models":[40],"(MLLMs).":[41],"Our":[42,106],"key":[43],"insight":[44],"is":[45,108,144],"inject":[47],"structured":[48],"context":[50],"derived":[51],"from":[52],"monocular":[53],"depth":[54,72],"estimation":[55],"into":[56,67],"input":[58],"prompts-without":[59],"modifying":[60],"any":[61,114],"model":[62,99],"parameters.":[63],"By":[64],"segmenting":[65],"scenes":[66],"closest,":[68],"mid-range,":[69],"farthest":[71],"layers":[73],"generating":[75],"region-specific":[76],"captions":[77],"using":[78],"grounded":[80,102],"vision-language":[81],"model,":[82],"we":[83],"produce":[84],"explicit":[85],"depth-aware":[86,135],"textual":[87],"descriptions.":[88],"These":[89],"descriptions":[90],"are":[91],"concatenated":[92],"with":[93,113],"image-question":[94],"prompts":[95],"guide":[97],"toward":[100],"spatially":[101],"hallucination-resistant":[104],"outputs.":[105],"lightweight,":[109],"modular,":[110],"compatible":[112],"black-box":[115],"MLLM.":[116],"Evaluations":[117],"on":[118],"hallucination-sensitive":[119],"(POPE)":[120],"reasoning-intensive":[122],"(GQA)":[123],"tasks":[124],"show":[125],"consistent":[126],"improvements":[127],"across":[128],"multiple":[129],"MLLMs,":[130],"demonstrating":[131],"effectiveness":[133],"in":[137],"zero-training":[139],"setup.":[140],"The":[141],"source":[142],"code":[143],"available":[145],"at":[146],"https://github.com/Rajarshi12321/ByDeWay.":[147]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-02-24T00:00:00"}
