{"id":"https://openalex.org/W7117450976","doi":"https://doi.org/10.1109/dicta68720.2025.11302471","title":"Can Current AI Models Count What We Mean, Not What They See? A Benchmark and Systematic Evaluation","display_name":"Can Current AI Models Count What We Mean, Not What They See? A Benchmark and Systematic Evaluation","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7117450976","doi":"https://doi.org/10.1109/dicta68720.2025.11302471"},"language":null,"primary_location":{"id":"doi:10.1109/dicta68720.2025.11302471","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121448001","display_name":"Gia Khanh Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I170239107","display_name":"University of South Australia","ror":"https://ror.org/01p93h210","country_code":"AU","type":"education","lineage":["https://openalex.org/I170239107"]},{"id":"https://openalex.org/I2802355861","display_name":"Australian Institute of Business","ror":"https://ror.org/01wf5jq34","country_code":"AU","type":"education","lineage":["https://openalex.org/I2802355861"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Gia Khanh Nguyen","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide,SA,Australia"],"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide,SA,Australia","institution_ids":["https://openalex.org/I170239107","https://openalex.org/I2802355861"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030008088","display_name":"Yifeng Huang","orcid":"https://orcid.org/0009-0003-9027-5963"},"institutions":[{"id":"https://openalex.org/I59553526","display_name":"Stony Brook University","ror":"https://ror.org/05qghxh33","country_code":"US","type":"education","lineage":["https://openalex.org/I59553526"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifeng Huang","raw_affiliation_strings":["Stony Brook University,Stony Brook,NY,USA"],"affiliations":[{"raw_affiliation_string":"Stony Brook University,Stony Brook,NY,USA","institution_ids":["https://openalex.org/I59553526"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038957718","display_name":"Minh Hoai","orcid":null},"institutions":[{"id":"https://openalex.org/I170239107","display_name":"University of South Australia","ror":"https://ror.org/01p93h210","country_code":"AU","type":"education","lineage":["https://openalex.org/I170239107"]},{"id":"https://openalex.org/I2802355861","display_name":"Australian Institute of Business","ror":"https://ror.org/01wf5jq34","country_code":"AU","type":"education","lineage":["https://openalex.org/I2802355861"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Minh Hoai","raw_affiliation_strings":["Australian Institute for Machine Learning, University of Adelaide,SA,Australia"],"affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, University of Adelaide,SA,Australia","institution_ids":["https://openalex.org/I170239107","https://openalex.org/I2802355861"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5121448001"],"corresponding_institution_ids":["https://openalex.org/I170239107","https://openalex.org/I2802355861"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.66570255,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.09149999916553497,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.09149999916553497,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12874","display_name":"Digital Imaging for Blood Diseases","score":0.08659999817609787,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.05620000138878822,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.9128999710083008},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5274999737739563},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4814999997615814},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4657999873161316},{"id":"https://openalex.org/keywords/counting-process","display_name":"Counting process","score":0.34209999442100525},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.33640000224113464}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.9128999710083008},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7269999980926514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.550000011920929},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5274999737739563},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5227000117301941},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4657999873161316},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40689998865127563},{"id":"https://openalex.org/C2781104640","wikidata":"https://www.wikidata.org/wiki/Q11827313","display_name":"Counting process","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.33640000224113464},{"id":"https://openalex.org/C148043351","wikidata":"https://www.wikidata.org/wiki/Q4456944","display_name":"Current (fluid)","level":2,"score":0.3181000053882599},{"id":"https://openalex.org/C33643355","wikidata":"https://www.wikidata.org/wiki/Q5176731","display_name":"Count data","level":3,"score":0.30709999799728394},{"id":"https://openalex.org/C168820333","wikidata":"https://www.wikidata.org/wiki/Q448889","display_name":"Visual inspection","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.25360000133514404},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dicta68720.2025.11302471","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302471","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1203557841","https://openalex.org/W2347064614","https://openalex.org/W2463631526","https://openalex.org/W2883929025","https://openalex.org/W2884960332","https://openalex.org/W2948513880","https://openalex.org/W2949333977","https://openalex.org/W2962921175","https://openalex.org/W2963838390","https://openalex.org/W2964209782","https://openalex.org/W2987761108","https://openalex.org/W3007175960","https://openalex.org/W3015469128","https://openalex.org/W3034785991","https://openalex.org/W3035193053","https://openalex.org/W3035925134","https://openalex.org/W3109157205","https://openalex.org/W3109272290","https://openalex.org/W3110563940","https://openalex.org/W3132424739","https://openalex.org/W3170764266","https://openalex.org/W3175725565","https://openalex.org/W3203845557","https://openalex.org/W4221148940","https://openalex.org/W4221160666","https://openalex.org/W4312423884","https://openalex.org/W4322747315","https://openalex.org/W4376272476","https://openalex.org/W4385775296","https://openalex.org/W4390873321","https://openalex.org/W4390873375","https://openalex.org/W4390889798","https://openalex.org/W4393153748","https://openalex.org/W4393154745","https://openalex.org/W4393158196","https://openalex.org/W4402667892","https://openalex.org/W4402703087","https://openalex.org/W4403878713","https://openalex.org/W4404612908","https://openalex.org/W4409262158","https://openalex.org/W4409363673","https://openalex.org/W4413144929","https://openalex.org/W7103755574"],"related_works":[],"abstract_inverted_index":{"Visual":[0],"counting":[1,27,37,45,113,164],"is":[2],"a":[3,16,54,117,155],"fundamental":[4],"yet":[5],"challenging":[6],"task,":[7],"especially":[8,146],"when":[9],"users":[10,144],"need":[11],"to":[12,41,59,78,140],"count":[13,81,142],"objects":[14],"of":[15,65,111,119],"specific":[17],"type":[18],"in":[19,36,70,86,147],"complex":[20],"scenes.":[21],"While":[22],"recent":[23,135],"models,":[24,121,126],"including":[25,122],"class-agnostic":[26],"models":[28,32,77,138],"and":[29,80,99,127,149,160,168],"large":[30,128],"vision-language":[31],"(VLMs),":[33],"show":[34,132],"promise":[35],"tasks,":[38],"their":[39],"ability":[40],"perform":[42],"fine-grained,":[43],"intent-driven":[44],"remains":[46],"unclear.":[47],"In":[48],"this":[49,171],"paper,":[50],"we":[51],"introduce":[52],"Pair-Tally,":[53],"benchmark":[55,116,172],"dataset":[56,93],"specifically":[57],"designed":[58],"evaluate":[60],"fine-grained":[61,148,162],"visual":[62,163],"counting.":[63],"Each":[64],"the":[66],"681":[67],"high-resolution":[68],"images":[69],"PairTally":[71,153],"contains":[72],"two":[73],"object":[74],"categories,":[75],"requiring":[76],"distinguish":[79],"based":[82],"on":[83],"subtle":[84],"differences":[85],"shape,":[87],"size,":[88],"color,":[89],"or":[90],"semantics.":[91],"The":[92,166],"includes":[94],"both":[95],"intercategory":[96],"(distinct":[97],"categories)":[98],"intra-category":[100],"(closely":[101],"related":[102],"subcategories)":[103],"settings,":[104],"making":[105],"it":[106],"suitable":[107],"for":[108,158,170],"rigorous":[109],"evaluation":[110],"selective":[112],"capabilities.":[114],"We":[115],"variety":[118],"state-of-the-art":[120],"exemplar-based":[123],"methods,":[124],"language-prompted":[125],"VLMs.":[129],"Our":[130],"results":[131],"that":[133],"despite":[134],"advances,":[136],"current":[137],"struggle":[139],"reliably":[141],"what":[143],"intend,":[145],"visually":[150],"ambiguous":[151],"cases.":[152],"provides":[154],"new":[156],"foundation":[157],"diagnosing":[159],"improving":[161],"systems.":[165],"data":[167],"code":[169],"are":[173],"available":[174],"at":[175],"https://github.com/bbvisual/PairTally_Benchmark.":[176]},"counts_by_year":[],"updated_date":"2026-02-23T20:09:44.859080","created_date":"2025-12-29T00:00:00"}
