{"id":"https://openalex.org/W4415708752","doi":"https://doi.org/10.1109/icme59968.2025.11209839","title":"CLIP-EBC: CLIP Can Count Accurately through Enhanced Blockwise Classification","display_name":"CLIP-EBC: CLIP Can Count Accurately through Enhanced Blockwise Classification","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708752","doi":"https://doi.org/10.1109/icme59968.2025.11209839"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209839","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101569452","display_name":"Yiming Ma","orcid":"https://orcid.org/0000-0001-8893-6492"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Yiming Ma","raw_affiliation_strings":["University of Warwick,Department of Computer Science,Coventry,United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Warwick,Department of Computer Science,Coventry,United Kingdom","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100634970","display_name":"V\u00edctor F. V\u00e1squez S\u00e1nchez","orcid":"https://orcid.org/0000-0003-4777-9237"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Victor Sanchez","raw_affiliation_strings":["University of Warwick,Department of Computer Science,Coventry,United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Warwick,Department of Computer Science,Coventry,United Kingdom","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021354054","display_name":"Tanaya Guha","orcid":"https://orcid.org/0000-0003-2167-4891"},"institutions":[{"id":"https://openalex.org/I7882870","display_name":"University of Glasgow","ror":"https://ror.org/00vtgdb53","country_code":"GB","type":"education","lineage":["https://openalex.org/I7882870"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tanaya Guha","raw_affiliation_strings":["University of Glasgow,School of Computing Science,Glasgow,United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Glasgow,School of Computing Science,Glasgow,United Kingdom","institution_ids":["https://openalex.org/I7882870"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101569452"],"corresponding_institution_ids":["https://openalex.org/I39555362"],"apc_list":null,"apc_paid":null,"fwci":3.8279,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.94279356,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.6287999749183655,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.6287999749183655,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.19059999287128448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.019500000402331352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.6035000085830688},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5618000030517578},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49470001459121704},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.47440001368522644},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.40450000762939453},{"id":"https://openalex.org/keywords/mean-absolute-error","display_name":"Mean absolute error","score":0.35679998993873596}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7253000140190125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6280999779701233},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.6035000085830688},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5618000030517578},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.47440001368522644},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C188154048","wikidata":"https://www.wikidata.org/wiki/Q6803609","display_name":"Mean absolute error","level":3,"score":0.35679998993873596},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.353300005197525},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.34459999203681946},{"id":"https://openalex.org/C156273044","wikidata":"https://www.wikidata.org/wiki/Q4913766","display_name":"Bin","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30239999294281006},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2734000086784363},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.263700008392334},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C199833920","wikidata":"https://www.wikidata.org/wiki/Q612536","display_name":"Vector quantization","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209839","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2463631526","https://openalex.org/W2886443245","https://openalex.org/W2963163009","https://openalex.org/W2963446712","https://openalex.org/W2964209782","https://openalex.org/W2976549228","https://openalex.org/W2996703886","https://openalex.org/W3027606690","https://openalex.org/W3186674029","https://openalex.org/W3203845557","https://openalex.org/W4293363567","https://openalex.org/W4307203027","https://openalex.org/W4312420092","https://openalex.org/W4312613051","https://openalex.org/W4312651322","https://openalex.org/W4385775296","https://openalex.org/W4386065414","https://openalex.org/W4386065419","https://openalex.org/W4390873023","https://openalex.org/W4390874814"],"related_works":[],"abstract_inverted_index":{"We":[0],"propose":[1,61],"CLIP-EBC,":[2],"the":[3,62,114,123,133],"first":[4],"fully":[5,89],"CLIP-based":[6],"model":[7],"designed":[8],"for":[9,94],"accurate":[10],"crowd":[11,16,95],"density":[12],"estimation.":[13],"Existing":[14],"classification-based":[15,102],"counting":[17],"frameworks":[18],"face":[19],"challenges":[20],"when":[21],"directly":[22],"applied":[23],"with":[24,40,127],"CLIP.":[25],"For":[26],"instance,":[27],"these":[28,58],"methods":[29,103],"quantize":[30],"count":[31,81],"values":[32],"into":[33],"bordering":[34],"real-valued":[35],"bins,":[36],"which":[37,68],"are":[38],"inconsistent":[39],"CLIP\u2019s":[41,91],"pretraining":[42],"corpus.":[43],"Besides,":[44],"this":[45,85],"quantization":[46],"strategy":[47],"also":[48],"introduces":[49],"label":[50],"ambiguity":[51],"near":[52],"shared":[53],"bin":[54],"borders.":[55],"To":[56],"address":[57],"issues,":[59],"we":[60],"Enhanced":[63],"Blockwise":[64],"Classification":[65],"(EBC)":[66],"framework,":[67,87],"utilizes":[69],"integer-valued":[70],"bins":[71],"and":[72],"additionally":[73],"incorporates":[74],"a":[75],"density-map-based":[76],"loss":[77],"to":[78,106],"further":[79],"improve":[80],"accuracy.":[82],"Building":[83],"on":[84,113,122],"backbone-agnostic":[86],"CLIP-EBC":[88,118],"leverages":[90],"recognition":[92],"capabilities":[93],"counting.":[96],"Experiments":[97],"show":[98],"that":[99],"EBC":[100],"improves":[101],"by":[104,137],"up":[105],"44.5%":[107],"in":[108],"mean":[109],"absolute":[110],"error":[111],"(MAE)":[112],"UCF-QNRF":[115],"dataset.":[116],"Furthermore,":[117],"achieves":[119],"state-of-the-art":[120],"performance":[121],"NWPU-Crowd":[124],"test":[125],"set,":[126],"an":[128],"MAE":[129],"of":[130],"58.2,":[131],"surpassing":[132],"previous":[134],"best":[135],"method":[136],"8.6%.":[138],"Our":[139],"code":[140],"is":[141],"available":[142],"at":[143],"https://github.com/Yiming-M/CLIP-EBC.":[144]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-30T08:08:38.191290","created_date":"2025-10-10T00:00:00"}
