{"id":"https://openalex.org/W3207088217","doi":"https://doi.org/10.1145/3474085.3475668","title":"Visual Language Based Succinct Zero-Shot Object Detection","display_name":"Visual Language Based Succinct Zero-Shot Object Detection","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3207088217","doi":"https://doi.org/10.1145/3474085.3475668","mag":"3207088217"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475668","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010057680","display_name":"Ye Zheng","orcid":"https://orcid.org/0000-0003-1618-6834"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ye Zheng","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences &amp; University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101951180","display_name":"Xi Huang","orcid":"https://orcid.org/0000-0003-1953-5809"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Huang","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100360616","display_name":"Li Cui","orcid":"https://orcid.org/0000-0002-4125-2138"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Cui","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5010057680"],"corresponding_institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.1921,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.49911765,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"5410","last_page":"5418"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7982629537582397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6919313073158264},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.6130453944206238},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5810927748680115},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4538499116897583},{"id":"https://openalex.org/keywords/visual-space","display_name":"Visual space","score":0.4457104802131653},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.4220965504646301},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4067406952381134},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3802679777145386},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3662288784980774},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33394259214401245},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1026235818862915}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7982629537582397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6919313073158264},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.6130453944206238},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5810927748680115},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4538499116897583},{"id":"https://openalex.org/C207363949","wikidata":"https://www.wikidata.org/wiki/Q462915","display_name":"Visual space","level":3,"score":0.4457104802131653},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.4220965504646301},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4067406952381134},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3802679777145386},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3662288784980774},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33394259214401245},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1026235818862915},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475668","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5799999833106995,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1442441212","display_name":null,"funder_award_id":"61672498","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2007972815","https://openalex.org/W2117539524","https://openalex.org/W2145215286","https://openalex.org/W2153579005","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2209594346","https://openalex.org/W2405223529","https://openalex.org/W2518962550","https://openalex.org/W2533598788","https://openalex.org/W2565639579","https://openalex.org/W2570343428","https://openalex.org/W2596142952","https://openalex.org/W2607642691","https://openalex.org/W2613718673","https://openalex.org/W2724492314","https://openalex.org/W2789366140","https://openalex.org/W2963037989","https://openalex.org/W2963149653","https://openalex.org/W2963150697","https://openalex.org/W2963283377","https://openalex.org/W2963351448","https://openalex.org/W2963499153","https://openalex.org/W2963538198","https://openalex.org/W2963545832","https://openalex.org/W2963936013","https://openalex.org/W2963960318","https://openalex.org/W2964086552","https://openalex.org/W2964241181","https://openalex.org/W2964307109","https://openalex.org/W2979571231","https://openalex.org/W2982234480","https://openalex.org/W2982770724","https://openalex.org/W2989604896","https://openalex.org/W2997305434","https://openalex.org/W3012573144","https://openalex.org/W3034199269","https://openalex.org/W3099554308","https://openalex.org/W3106250896","https://openalex.org/W3143107425","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W2381179799","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2366718574","https://openalex.org/W2359774528","https://openalex.org/W4292830139","https://openalex.org/W4319309705"],"abstract_inverted_index":{"On":[0],"account":[1],"of":[2,6,52,58,74,121,172,195],"a":[3,90,135,156,165,173,178],"large":[4],"scale":[5],"dataset":[7],"need":[8],"to":[9,12,33,48,63,67,202],"be":[10,215],"annotated":[11],"train":[13],"the":[14,50,59,68,75,86,114,119,147,151,185,190,207,239],"deep":[15],"learning":[16,168],"based":[17,138],"modern":[18,69,152],"object":[19,23,54,70,141,153,162,231],"detection":[20,24,142,163,232],"model,":[21],"zero-shot":[22,53,140,161,230],"has":[25],"become":[26],"an":[27],"important":[28],"research":[29],"field":[30],"which":[31,144,170],"aims":[32],"simultaneously":[34],"localize":[35],"and":[36,80,92,126,177,192,198,237],"recognize":[37],"unseen":[38,131,204],"objects":[39],"that":[40,105,113,211,222],"are":[41],"not":[42,214],"observed":[43],"during":[44],"training.":[45],"In":[46,101],"order":[47],"improve":[49],"performance":[51,129],"detection,":[55],"recent":[56],"state":[57],"art":[60],"methods":[61,233],"tend":[62],"make":[64],"complicated":[65,99],"modifications":[66],"detectors":[71],"in":[72,123,150],"terms":[73],"model":[76],"structure,":[77],"loss":[78],"function":[79],"training":[81],"process.":[82],"They":[83],"always":[84],"take":[85],"simple":[87,106],"modification":[88,107,116],"as":[89],"baseline,":[91],"think":[93],"it":[94],"is":[95,164],"worse":[96],"than":[97],"more":[98],"methods.":[100],"contrast,":[102],"we":[103,133],"find":[104],"can":[108,226],"achieve":[109],"better":[110],"performance.":[111],"Considering":[112],"redundant":[115],"may":[117],"increase":[118],"risk":[120],"over-fitting":[122],"seen":[124,196],"classes":[125,197],"reduce":[127],"generalization":[128],"on":[130,234],"classes,":[132],"propose":[134],"visual":[136,174,186],"language":[137,179,187,193],"succinct":[139,224],"framework,":[143],"only":[145],"replaces":[146],"classification":[148],"branch":[149],"detector":[154],"with":[155],"lightweight":[157],"visual-language":[158,182],"network.":[159],"Since":[160],"classic":[166],"multi-modal":[167],"protocol":[169],"consists":[171],"feature":[175],"space":[176],"space,":[180],"our":[181,223],"network":[183],"learns":[184],"alignment":[188,201],"from":[189],"image":[191],"data":[194],"transfers":[199],"this":[200],"detect":[203],"objects.":[205],"Following":[206],"Occam's":[208],"razor":[209],"principle":[210],"\"Entities":[212],"should":[213],"multiplied":[216],"unnecessarily\",":[217],"extensive":[218],"experimental":[219],"results":[220],"show":[221],"framework":[225],"suppress":[227],"all":[228],"existing":[229],"several":[235],"benchmarks":[236],"gets":[238],"new":[240],"state-of-the-art.":[241]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
