{"id":"https://openalex.org/W4392873778","doi":"https://doi.org/10.1109/iccv51701.2025.02130","title":"Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring","display_name":"Griffon v2: Advancing Multimodal Perception with High-Resolution Scaling and Visual-Language Co-Referring","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4392873778","doi":"https://doi.org/10.1109/iccv51701.2025.02130"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.02130","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02130","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.09333","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040626313","display_name":"Yufei Zhan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yufei Zhan","raw_affiliation_strings":["School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shurong Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shurong Zheng","raw_affiliation_strings":["Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065043476","display_name":"Yousong Zhu","orcid":"https://orcid.org/0000-0001-8544-410X"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yousong Zhu","raw_affiliation_strings":["Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102556984","display_name":"Hongyin Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyin Zhao","raw_affiliation_strings":["Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006806882","display_name":"Fan Yang","orcid":"https://orcid.org/0000-0001-6861-9596"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Yang","raw_affiliation_strings":["Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4210112150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110938150","display_name":"Ming Tang","orcid":"https://orcid.org/0000-0003-4976-3095"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Tang","raw_affiliation_strings":["Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4210112150"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058420913","display_name":"Jinqiao Wang","orcid":"https://orcid.org/0000-0002-9118-2780"},"institutions":[{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinqiao Wang","raw_affiliation_strings":["Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foundation Model Research Center, Institute of Automation, Chinese Academy of Sciences,Beijing,China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I4210112150"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5040626313"],"corresponding_institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.0398,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86265385,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"22947","last_page":"22957"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9358999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6671239137649536},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6598060131072998},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.5063227415084839},{"id":"https://openalex.org/keywords/visual-language","display_name":"Visual language","score":0.46790748834609985},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.4091470241546631},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.37973952293395996},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.34175732731819153},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3326839804649353},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.32858771085739136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3262813091278076},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2811945080757141},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1915474534034729},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.11771339178085327},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.07996618747711182},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.05714493989944458}],"concepts":[{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6671239137649536},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6598060131072998},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.5063227415084839},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.46790748834609985},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.4091470241546631},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37973952293395996},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34175732731819153},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3326839804649353},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.32858771085739136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3262813091278076},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2811945080757141},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1915474534034729},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.11771339178085327},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.07996618747711182},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.05714493989944458}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.02130","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.02130","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2403.09333","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.09333","pdf_url":"https://arxiv.org/pdf/2403.09333","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2403.09333","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.09333","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.09333","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.09333","pdf_url":"https://arxiv.org/pdf/2403.09333","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2899196128","display_name":null,"funder_award_id":"62276260","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4069856734","display_name":null,"funder_award_id":"62472423","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7931108516","display_name":null,"funder_award_id":"62276260,62472423","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392873778.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W141820298","https://openalex.org/W4378770497","https://openalex.org/W2049584446","https://openalex.org/W2079781215","https://openalex.org/W2064404759","https://openalex.org/W4308245303","https://openalex.org/W2014033564","https://openalex.org/W2910573937","https://openalex.org/W4385571583","https://openalex.org/W4380682236"],"abstract_inverted_index":{"Large":[0,96],"Vision":[1],"Language":[2,97],"Models":[3],"have":[4],"achieved":[5],"fine-grained":[6],"object":[7,68,177,179],"perception,":[8],"but":[9],"the":[10,21,35,91,103,125],"limitation":[11,32],"of":[12,23,157],"image":[13,79],"resolution":[14],"remains":[15],"a":[16,59,83,132],"significant":[17],"obstacle":[18],"to":[19,38,89],"surpassing":[20],"performance":[22,166],"task-specific":[24],"experts":[25],"in":[26,45,95,176],"complex":[27],"and":[28,42,72,85,106,109,146,161,169,172,181,184],"dense":[29],"scenarios.":[30],"Such":[31],"further":[33,123],"restricts":[34],"model's":[36],"potential":[37],"achieve":[39,164],"nuanced":[40],"visual":[41,71,134,160],"language":[43],"referring":[44,69],"domains":[46],"such":[47],"as":[48],"GUI":[49],"Agents,":[50],"counting,":[51,180],"\\textit{etc}.":[52],"To":[53,75],"address":[54],"this":[55],"issue,":[56],"we":[57,81,122],"introduce":[58],"unified":[60],"high-resolution":[61],"generalist":[62],"model,":[63],"Griffon":[64,152],"v2,":[65],"enabling":[66],"flexible":[67,141],"with":[70,127,140,159],"textual":[73,162],"prompts.":[74],"efficiently":[76],"scale":[77],"up":[78],"resolution,":[80],"design":[82,100],"simple":[84],"lightweight":[86],"down-sampling":[87],"projector":[88],"overcome":[90],"input":[92],"tokens":[93],"constraint":[94],"Models.":[98],"This":[99],"inherently":[101],"preserves":[102],"complete":[104],"contexts":[105],"fine":[107],"details":[108],"significantly":[110],"improves":[111],"multimodal":[112],"perception":[113],"ability,":[114],"especially":[115],"for":[116],"small":[117],"objects.":[118],"Building":[119],"upon":[120],"this,":[121],"equip":[124],"model":[126],"visual-language":[128],"co-referring":[129],"capabilities":[130],"through":[131],"plug-and-play":[133],"tokenizer.":[135],"It":[136],"enables":[137],"user-friendly":[138],"interaction":[139],"target":[142],"images,":[143],"free-form":[144],"texts,":[145],"even":[147],"coordinates.":[148],"Experiments":[149],"demonstrate":[150],"that":[151],"v2":[153],"can":[154],"localize":[155],"objects":[156],"interest":[158],"referring,":[163],"state-of-the-art":[165],"on":[167],"REC":[168],"phrase":[170],"grounding,":[171],"outperform":[173],"expert":[174],"models":[175],"detection,":[178],"REG.":[182],"Data":[183],"codes":[185],"are":[186],"released":[187],"at":[188],"https://github.com/jefferyZhan/Griffon.":[189]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-06T06:03:25.996018","created_date":"2025-10-10T00:00:00"}
