{"id":"https://openalex.org/W4392970117","doi":"https://doi.org/10.48550/arxiv.2403.11468","title":"CollagePrompt: A Benchmark for Budget-Friendly Visual Recognition with GPT-4V","display_name":"CollagePrompt: A Benchmark for Budget-Friendly Visual Recognition with GPT-4V","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392970117","doi":"https://doi.org/10.48550/arxiv.2403.11468"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.11468","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.11468","pdf_url":"https://arxiv.org/pdf/2403.11468","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.11468","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102078048","display_name":"Siyu Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Siyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065882425","display_name":"Yunke Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yunke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094196062","display_name":"Daochang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Daochang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101065618","display_name":"Chang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Xu, Chang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102078048"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.5618000030517578,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.5618000030517578,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14319","display_name":"Currency Recognition and Detection","score":0.5123000144958496,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.5024999976158142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/environmentally-friendly","display_name":"Environmentally friendly","score":0.4708311855792999},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.4595460295677185},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.35537606477737427},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.12061360478401184},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.06221780180931091}],"concepts":[{"id":"https://openalex.org/C171534860","wikidata":"https://www.wikidata.org/wiki/Q655870","display_name":"Environmentally friendly","level":2,"score":0.4708311855792999},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.4595460295677185},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.35537606477737427},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.12061360478401184},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.06221780180931091}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.11468","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.11468","pdf_url":"https://arxiv.org/pdf/2403.11468","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2403.11468","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.11468","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.11468","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.11468","pdf_url":"https://arxiv.org/pdf/2403.11468","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320312169","display_name":"National Computational Infrastructure","ror":"https://ror.org/04yx6dh41"},{"id":"https://openalex.org/F4320337351","display_name":"National Cancer Institute","ror":"https://ror.org/040gcmg81"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392970117.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W1577087430","https://openalex.org/W2368072971","https://openalex.org/W186863808","https://openalex.org/W2348242304","https://openalex.org/W3173132437","https://openalex.org/W2352189277","https://openalex.org/W2351585432","https://openalex.org/W2377270209","https://openalex.org/W2383783507"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,17,86,103,205],"generative":[3],"AI":[4],"have":[5],"suggested":[6],"that":[7,51],"by":[8],"taking":[9],"visual":[10,18,58,64,88,117,160,206],"prompts,":[11,199],"GPT-4V":[12,62,202],"can":[13],"demonstrate":[14],"significant":[15],"proficiency":[16],"recognition":[19,65,118,161],"tasks.":[20],"Despite":[21],"its":[22,37,84],"impressive":[23],"capabilities,":[24],"the":[25,72,104,110,131,184,187],"financial":[26],"cost":[27],"associated":[28],"with":[29,100,162],"GPT-4V's":[30,87],"inference":[31],"presents":[32],"a":[33,46,56,76,144,150],"substantial":[34],"barrier":[35],"to":[36,82,115,156,171,182,195],"wide":[38],"use.":[39],"To":[40,139],"address":[41],"this":[42,140,215],"challenge,":[43],"we":[44,142],"propose":[45],"budget-friendly":[47],"collage":[48,80,137,154,173,189,198],"prompting":[49],"task":[50],"collages":[52],"multiple":[53],"images":[54,68,108],"into":[55],"single":[57],"prompt":[59,155],"and":[60,177,210],"makes":[61],"perform":[63],"on":[66],"several":[67,93],"simultaneously,":[69],"thereby":[70],"reducing":[71],"cost.":[73],"We":[74],"collect":[75],"dataset":[77],"of":[78,109,133,186],"various":[79],"prompts":[81],"assess":[83],"performance":[85],"recognition.":[89,207],"Our":[90,191],"evaluations":[91],"reveal":[92],"key":[94],"findings:":[95],"1)":[96],"Recognition":[97],"accuracy":[98],"varies":[99],"different":[101],"positions":[102],"collage.":[105],"2)":[106],"Grouping":[107],"same":[111],"category":[112],"together":[113],"leads":[114],"better":[116,196],"results.":[119],"3)":[120],"Incorrect":[121],"labels":[122],"often":[123],"come":[124],"from":[125,168],"adjacent":[126],"images.":[127],"These":[128],"findings":[129],"highlight":[130],"importance":[132],"image":[134],"arrangement":[135],"within":[136],"prompt.":[138,190],"end,":[141],"construct":[143],"benchmark":[145,192],"called":[146],"CollagePrompt,":[147],"which":[148],"offers":[149],"platform":[151],"for":[152],"designing":[153],"achieve":[157],"more":[158,203],"cost-effective":[159,204],"GPT-4V.":[163],"A":[164],"baseline":[165],"method":[166],"derived":[167],"genetic":[169],"algorithms":[170],"optimize":[172,197],"layouts":[174],"is":[175],"proposed":[176],"two":[178],"metrics":[179],"are":[180,212],"introduced":[181],"measure":[183],"efficiency":[185],"optimized":[188],"enables":[193],"researchers":[194],"thus":[200],"making":[201],"The":[208],"code":[209],"data":[211],"available":[213],"at":[214],"project":[216],"page":[217],"https://collageprompting.github.io/.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2024-03-20T00:00:00"}
