{"id":"https://openalex.org/W7161681669","doi":"https://doi.org/10.48550/arxiv.2605.18700","title":"A Large-Scale Study on the Accuracy vs Cost Trade-offs of Training and Evaluation Settings in Fine-Grained Image Recognition","display_name":"A Large-Scale Study on the Accuracy vs Cost Trade-offs of Training and Evaluation Settings in Fine-Grained Image Recognition","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161681669","doi":"https://doi.org/10.48550/arxiv.2605.18700"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.18700","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18700","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.18700","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087804607","display_name":"Edwin Arkel Rios","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rios, Edwin Arkel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136494584","display_name":"Augusto Christian Surya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Surya, Augusto Christian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119983012","display_name":"Oswin Gosal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gosal, Oswin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119983011","display_name":"Fernando Mikael","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mikael, Fernando","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136475764","display_name":"Mary Madeline Nicole","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nicole, Mary Madeline","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136456881","display_name":"Kisoon Jang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jang, Kisoon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136370344","display_name":"Bo-Cheng Lai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lai, Bo-Cheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136471742","display_name":"Min-Chun Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Min-Chun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.47589999437332153,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.47589999437332153,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.14409999549388885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.08659999817609787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8679999709129333},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7059000134468079},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6478000283241272},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.6247000098228455},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4618000090122223},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.4471000134944916},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.44209998846054077},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.41769999265670776}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8679999709129333},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7059000134468079},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6858999729156494},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6478000283241272},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.6247000098228455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5792999863624573},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5412999987602234},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4618000090122223},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.4471000134944916},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.41769999265670776},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.40459999442100525},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3928000032901764},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.39169999957084656},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3472999930381775},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2906000018119812},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.28949999809265137},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.18700","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18700","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.18700","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18700","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7511450052261353,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Prior":[0],"work":[1,29],"on":[2,53,74,106],"fine-grained":[3,60],"image":[4],"recognition":[5],"(FGIR)":[6],"has":[7,16],"established":[8],"the":[9,12,18,54,103],"importance":[10],"of":[11,56],"backbone":[13],"selection,":[14],"but":[15],"neglected":[17],"accuracy-vs-cost":[19],"trade-offs":[20],"under":[21],"different":[22],"training":[23,41,61,126],"and":[24,42,48,77,115,151],"evaluation":[25,43],"settings.":[26],"In":[27],"this":[28],"we":[30,147],"conduct":[31],"a":[32,70,130],"large-scale":[33],"study":[34],"with":[35,80],"over":[36],"2000":[37],"experiments":[38],"across":[39],"6":[40],"settings,":[44],"9":[45],"pretrained":[46],"backbones,":[47],"17":[49],"datasets.":[50],"Preliminary":[51],"observations":[52],"effectiveness":[55],"data":[57],"augmentation":[58],"for":[59],"motivate":[62],"us":[63],"to":[64,132],"extend":[65],"Counterfactual":[66],"Attention":[67],"Learning":[68],"(CAL),":[69],"state-of-the-art":[71],"method":[72],"based":[73],"data-aware":[75,123],"cropping":[76],"masking":[78],"augmentations,":[79],"cross-image":[81],"discriminative":[82,107],"region":[83],"mixing":[84],"augmentation.":[85],"We":[86],"also":[87],"propose":[88],"an":[89],"efficient":[90],"evaluation-only":[91],"variant":[92],"that":[93,109,122],"maintains":[94],"competitive":[95],"accuracy":[96,135],"while":[97],"reducing":[98,140],"inference":[99,141],"costs":[100],"by":[101,113],"forfeiting":[102],"forward":[104],"pass":[105],"crops":[108],"is":[110],"normally":[111],"used":[112],"CAL":[114],"similar":[116],"FGIR":[117],"methods.":[118],"Our":[119],"results":[120],"show":[121],"augmentations":[124],"during":[125],"only":[127],"can":[128],"enable":[129],"model":[131],"achieve":[133],"excellent":[134],"even":[136],"without":[137],"crops,":[138],"significantly":[139],"costs.":[142],"To":[143],"support":[144],"future":[145],"research":[146],"share":[148],"our":[149],"code":[150],"checkpoints":[152],"at:":[153],"\\url{https://github.com/arkel23/FGIR-Backbones}":[154]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
