{"id":"https://openalex.org/W4403936798","doi":"https://doi.org/10.1109/tcsvt.2024.3488721","title":"Zero-Shot Object Counting With Vision-Language Prior Guidance Network","display_name":"Zero-Shot Object Counting With Vision-Language Prior Guidance Network","publication_year":2024,"publication_date":"2024-10-31","ids":{"openalex":"https://openalex.org/W4403936798","doi":"https://doi.org/10.1109/tcsvt.2024.3488721"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3488721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3488721","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051542867","display_name":"Wenzhe Zhai","orcid":"https://orcid.org/0000-0003-0996-6832"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenzhe Zhai","raw_affiliation_strings":["College of Intelligent Systems Science and Engineering, Harbin Engineering University, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0003-0996-6832","affiliations":[{"raw_affiliation_string":"College of Intelligent Systems Science and Engineering, Harbin Engineering University, Harbin, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041111179","display_name":"Xianglei Xing","orcid":"https://orcid.org/0000-0002-4159-1922"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianglei Xing","raw_affiliation_strings":["College of Intelligent Systems Science and Engineering, Harbin Engineering University, Harbin, China"],"raw_orcid":"https://orcid.org/0000-0002-4159-1922","affiliations":[{"raw_affiliation_string":"College of Intelligent Systems Science and Engineering, Harbin Engineering University, Harbin, China","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026551711","display_name":"Mingliang Gao","orcid":"https://orcid.org/0000-0001-7273-7499"},"institutions":[{"id":"https://openalex.org/I119203015","display_name":"Shandong University of Technology","ror":"https://ror.org/02mr3ar13","country_code":"CN","type":"education","lineage":["https://openalex.org/I119203015"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingliang Gao","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Shandong University of Technology, Zibo, China"],"raw_orcid":"https://orcid.org/0000-0001-7273-7499","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Shandong University of Technology, Zibo, China","institution_ids":["https://openalex.org/I119203015"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041574741","display_name":"Qilei Li","orcid":"https://orcid.org/0000-0002-9675-9016"},"institutions":[{"id":"https://openalex.org/I119203015","display_name":"Shandong University of Technology","ror":"https://ror.org/02mr3ar13","country_code":"CN","type":"education","lineage":["https://openalex.org/I119203015"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qilei Li","raw_affiliation_strings":["School of Electrical and Electronic Engineering, Shandong University of Technology, Zibo, China"],"raw_orcid":"https://orcid.org/0000-0002-9675-9016","affiliations":[{"raw_affiliation_string":"School of Electrical and Electronic Engineering, Shandong University of Technology, Zibo, China","institution_ids":["https://openalex.org/I119203015"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":6.3131,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96907717,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"35","issue":"3","first_page":"2487","last_page":"2498"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.631232500076294},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.622606098651886},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6122856736183167},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4938984513282776},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.48334118723869324},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4102725684642792},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.27678942680358887}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.631232500076294},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.622606098651886},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6122856736183167},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4938984513282776},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.48334118723869324},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4102725684642792},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27678942680358887},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3488721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3488721","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4847654746","display_name":null,"funder_award_id":"No. 62076078","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5531844970","display_name":null,"funder_award_id":"CAAIXSJLJJ-2020-033A","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G808462669","display_name":null,"funder_award_id":"62076078","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1181052087","https://openalex.org/W2463631526","https://openalex.org/W2955058313","https://openalex.org/W2962832028","https://openalex.org/W2962921175","https://openalex.org/W2968848584","https://openalex.org/W3047841165","https://openalex.org/W3096609285","https://openalex.org/W3120979957","https://openalex.org/W3159481202","https://openalex.org/W3170764266","https://openalex.org/W3177211876","https://openalex.org/W4206760693","https://openalex.org/W4207050521","https://openalex.org/W4221148940","https://openalex.org/W4221160666","https://openalex.org/W4281743493","https://openalex.org/W4283312418","https://openalex.org/W4289752563","https://openalex.org/W4293818652","https://openalex.org/W4296739197","https://openalex.org/W4312458877","https://openalex.org/W4319866161","https://openalex.org/W4322489886","https://openalex.org/W4322747315","https://openalex.org/W4366378414","https://openalex.org/W4378194725","https://openalex.org/W4379659754","https://openalex.org/W4382998741","https://openalex.org/W4385245566","https://openalex.org/W4385252090","https://openalex.org/W4385775296","https://openalex.org/W4386065414","https://openalex.org/W4386076390","https://openalex.org/W4390873375","https://openalex.org/W4393171259","https://openalex.org/W4394625746","https://openalex.org/W4394984748","https://openalex.org/W4399769564","https://openalex.org/W4404612908","https://openalex.org/W6791353385","https://openalex.org/W6809665764","https://openalex.org/W6811230874","https://openalex.org/W6838322790","https://openalex.org/W6849177959"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0,20,137,159],"majority":[1],"of":[2,22,43,85,140,167,200,217,255,277],"existing":[3],"counting":[4,42,58],"models":[5,59,99,172],"are":[6,261],"designed":[7],"to":[8,132,173,192,209,222],"operate":[9],"on":[10,53,61,196,271],"a":[11,49,122,183],"singular":[12],"object":[13,170,241],"category,":[14],"such":[15],"as":[16,95,178,245],"crowds":[17],"or":[18],"vehicles.":[19],"emergence":[21],"multi-modal":[23],"foundational":[24],"models,":[25],"e.g.,":[26],"Contrastive":[27],"Language-Image":[28],"Pre-training":[29],"(CLIP),":[30],"has":[31],"paved":[32],"the":[33,41,68,82,90,113,126,145,164,175,190,197,201,204,211,218,224,252,275,278],"way":[34],"for":[35,182,248],"class-agnostic":[36,57],"counting.":[37],"This":[38,187],"approach":[39],"facilitates":[40],"objects":[44],"across":[45,227],"diverse":[46],"classes":[47],"within":[48,112],"single":[50],"image":[51],"based":[52,60],"textual":[54,237],"indications.":[55],"However,":[56],"CLIP":[62,69,91],"confront":[63],"two":[64,135],"primary":[65],"challenges.":[66,136],"Firstly,":[67],"model":[70,92],"exhibits":[71],"limited":[72],"sensitivity":[73],"towards":[74],"location":[75,199],"information,":[76],"which":[77,243],"prioritizes":[78],"global":[79],"content":[80],"over":[81],"precise":[83],"localization":[84],"objects.":[86,202],"Therefore,":[87],"directly":[88],"employing":[89],"is":[93,207],"regarded":[94],"suboptimal.":[96],"Secondly,":[97],"these":[98,134,256],"commonly":[100],"employ":[101],"frozen":[102],"pre-trained":[103,169],"vision":[104],"and":[105,154,214],"language":[106],"encoders":[107],"while":[108,263],"disregarding":[109],"potential":[110],"misalignment":[111],"constructed":[114],"hypothesis":[115],"space.":[116],"In":[117],"this":[118],"paper,":[119],"we":[120,230],"propose":[121],"unified":[123],"framework,":[124],"named":[125],"Vision-Language":[127],"Prior":[128,150],"Guidance":[129],"(VLPG)":[130],"Network,":[131],"tackle":[133],"VLPG":[138],"consists":[139],"three":[141,257],"key":[142],"components,":[143],"namely":[144],"Grounding":[146,160],"DINO":[147,161],"module,":[148,153],"Spatial":[149],"Calibration":[151],"(SPC)":[152],"Object-Centric":[155],"Alignment":[156],"(OCA)":[157],"module.":[158],"module":[162,206,234],"utilizes":[163],"spatial-awareness":[165],"capability":[166],"extensive":[168],"grounding":[171],"incorporate":[174],"spatial":[176,219],"position":[177],"an":[179,232,240,246],"additional":[180],"prior":[181],"particular":[184],"query":[185,242],"class.":[186],"adaptation":[188],"enables":[189],"network":[191],"concentrate":[193],"more":[194],"precisely":[195],"exact":[198],"Meanwhile,":[203],"SPC":[205],"built":[208],"extract":[210],"long-range":[212],"dependencies":[213],"local":[215],"regions":[216],"position.":[220],"Additionally,":[221],"align":[223],"feature":[225],"space":[226],"different":[228],"modalities,":[229],"design":[231],"OCA":[233],"that":[235],"condenses":[236],"information":[238],"into":[239],"serves":[244],"instruction":[247],"cross-modality":[249],"matching.":[250],"Through":[251],"collaborative":[253],"efforts":[254],"modules,":[258],"multimodal":[259],"representations":[260],"aligned":[262],"maintaining":[264],"their":[265],"discriminative":[266],"nature.":[267],"Comprehensive":[268],"experiments":[269],"conducted":[270],"various":[272],"benchmarks":[273],"validate":[274],"effectiveness":[276],"proposed":[279],"model.":[280]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":15}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
