{"id":"https://openalex.org/W7129035029","doi":"https://doi.org/10.48550/arxiv.2602.12774","title":"Bootstrapping MLLM for Weakly-Supervised Class-Agnostic Object Counting","display_name":"Bootstrapping MLLM for Weakly-Supervised Class-Agnostic Object Counting","publication_year":2026,"publication_date":"2026-02-13","ids":{"openalex":"https://openalex.org/W7129035029","doi":"https://doi.org/10.48550/arxiv.2602.12774"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.12774","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126114844","display_name":"Xiaowen Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Xiaowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126133617","display_name":"Zijie Yue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yue, Zijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126079544","display_name":"Yong Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Yong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126170009","display_name":"Cairong Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Cairong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101903372","display_name":"Qijun Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Qijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126099468","display_name":"Miaojing Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Miaojing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5126114844"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.3709999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.3709999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12697","display_name":"Water Quality Monitoring Technologies","score":0.06509999930858612,"subfield":{"id":"https://openalex.org/subfields/2312","display_name":"Water Science and Technology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12597","display_name":"Fire Detection and Safety Systems","score":0.054499998688697815,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7041000127792358},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6809999942779541},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.5289000272750854},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4602999985218048},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.4562000036239624},{"id":"https://openalex.org/keywords/counting-problem","display_name":"Counting problem","score":0.41449999809265137},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.40799999237060547},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.40230000019073486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7294999957084656},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7041000127792358},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6809999942779541},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5710999965667725},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4602999985218048},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.4562000036239624},{"id":"https://openalex.org/C16592021","wikidata":"https://www.wikidata.org/wiki/Q5177154","display_name":"Counting problem","level":2,"score":0.41449999809265137},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.40799999237060547},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.40230000019073486},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.36010000109672546},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34060001373291016},{"id":"https://openalex.org/C3019973339","wikidata":"https://www.wikidata.org/wiki/Q899523","display_name":"Object based","level":3,"score":0.32580000162124634},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3075999915599823},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2842999994754791},{"id":"https://openalex.org/C2781104640","wikidata":"https://www.wikidata.org/wiki/Q11827313","display_name":"Counting process","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2644999921321869},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.25999999046325684}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.12774","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.12774","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.12774","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.12774","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Object":[0],"counting":[1,17,46,95,162,175],"is":[2,108,140,205],"a":[3,47,103,122,135,160],"fundamental":[4],"task":[5],"in":[6,12,97,177],"computer":[7],"vision,":[8],"with":[9],"broad":[10],"applicability":[11],"many":[13,195],"real-world":[14],"scenarios.":[15],"Fully-supervised":[16],"methods":[18,27,198],"require":[19],"costly":[20],"point-level":[21],"annotations":[22],"per":[23],"object.":[24],"Few":[25],"weakly-supervised":[26,61],"leverage":[28],"only":[29],"image-level":[30],"object":[31,65,74,118,157],"counts":[32],"as":[33],"supervision":[34],"and":[35,100,125,166,169,186],"achieve":[36],"fairly":[37],"promising":[38],"results.":[39],"They":[40],"are,":[41],"however,":[42],"often":[43],"limited":[44],"to":[45,72,81,92,110,114,142,146,155,173],"single":[48],"category,":[49],"e.g.":[50],"person.":[51],"In":[52],"this":[53],"paper,":[54],"we":[55,85],"propose":[56],"WS-COC,":[57],"the":[58,82,94,112,117,129,144,148],"first":[59],"MLLM-driven":[60],"framework":[62],"for":[63],"class-agnostic":[64],"counting.":[66],"Instead":[67],"of":[68,151],"directly":[69],"fine-tuning":[70],"MLLMs":[71],"predict":[73],"counts,":[75],"which":[76],"can":[77],"be":[78],"challenging":[79],"due":[80],"modality":[83],"gap,":[84],"incorporate":[86],"three":[87],"simple":[88],"yet":[89],"effective":[90],"strategies":[91],"bootstrap":[93],"paradigm":[96],"both":[98],"training":[99],"testing:":[101],"First,":[102],"divide-and-discern":[104],"dialogue":[105],"tuning":[106],"strategy":[107,139,164],"proposed":[109],"guide":[111],"MLLM":[113,145],"determine":[115],"whether":[116],"count":[119,137,171],"falls":[120],"within":[121],"specific":[123],"range":[124,130],"progressively":[126],"break":[127],"down":[128],"through":[131],"multi-round":[132],"dialogue.":[133],"Second,":[134],"compare-and-rank":[136],"optimization":[138],"introduced":[141],"train":[143],"optimize":[147],"relative":[149],"ranking":[150],"multiple":[152],"images":[153],"according":[154],"their":[156],"counts.":[158],"Third,":[159],"global-and-local":[161],"enhancement":[163],"aggregates":[165],"fuses":[167],"local":[168],"global":[170],"predictions":[172],"improve":[174],"performance":[176],"dense":[178],"scenes.":[179],"Extensive":[180],"experiments":[181],"on":[182],"FSC-147,":[183],"CARPK,":[184],"PUCPR+,":[185],"ShanghaiTech":[187],"show":[188],"that":[189],"WS-COC":[190],"matches":[191],"or":[192],"even":[193],"surpasses":[194],"state-of-art":[196],"fully-supervised":[197],"while":[199],"significantly":[200],"reducing":[201],"annotation":[202],"costs.":[203],"Code":[204],"available":[206],"at":[207],"https://github.com/viscom-tongji/WS-COC.":[208]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-17T00:00:00"}
