{"id":"https://openalex.org/W4413144805","doi":"https://doi.org/10.1109/cvpr52734.2025.01841","title":"FastVLM: Efficient Vision Encoding for Vision Language Models","display_name":"FastVLM: Efficient Vision Encoding for Vision Language Models","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413144805","doi":"https://doi.org/10.1109/cvpr52734.2025.01841"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.01841","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01841","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013120724","display_name":"Pavan Kumar Anasosalu Vasu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Pavan Kumar Anasosalu Vasu","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036601505","display_name":"Fartash Faghri","orcid":"https://orcid.org/0000-0001-5975-5158"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Fartash Faghri","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102972645","display_name":"Chunliang Li","orcid":"https://orcid.org/0000-0002-5938-5510"},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chun-Liang Li","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104313177","display_name":"Cem Koc","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Cem Koc","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115691056","display_name":"Nate True","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nate True","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026835553","display_name":"Albert Antony","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Albert Antony","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083007242","display_name":"G. Santhanam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gokul Santhanam","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017283719","display_name":"James Gabriel","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"James Gabriel","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094170434","display_name":"Peter Grasch","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Grasch","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028613002","display_name":"Oncel Tuzel","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Oncel Tuzel","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059295598","display_name":"Hadi Pouransari","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107260","display_name":"Apple (United Kingdom)","ror":"https://ror.org/01vpeym60","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210107260"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hadi Pouransari","raw_affiliation_strings":["Apple"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Apple","institution_ids":["https://openalex.org/I4210107260"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5013120724"],"corresponding_institution_ids":["https://openalex.org/I4210107260"],"apc_list":null,"apc_paid":null,"fwci":14.629,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.99121006,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"19769","last_page":"19780"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7484219670295715},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.7297672033309937},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5175777673721313},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5135071873664856},{"id":"https://openalex.org/keywords/machine-vision","display_name":"Machine vision","score":0.43174657225608826}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7484219670295715},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.7297672033309937},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5175777673721313},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5135071873664856},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.43174657225608826}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.01841","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01841","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Scaling":[0],"the":[1,9,36,49,66,73,86,138,151,155,163,167,192,208],"input":[2,152],"image":[3,19,89,146],"resolution":[4,147,194],"is":[5,222],"essential":[6],"for":[7,130,157],"enhancing":[8],"performance":[10,180,199],"of":[11,39,52,68,85],"Vision":[12],"Language":[13],"Models":[14],"(VLMs),":[15],"particularly":[16],"in":[17,174],"text-rich":[18],"understanding":[20],"tasks.":[21],"However,":[22],"popular":[23],"visual":[24,69,142],"encoders":[25],"such":[26],"as":[27],"ViTs":[28],"become":[29],"inefficient":[30],"at":[31,191],"high":[32,42],"resolutions":[33],"due":[34],"to":[35,72,121,185,189],"large":[37],"number":[38,67],"tokens":[40,70,124],"and":[41,64,95,110,125,145,161,205,217,226],"encoding":[43,62,128],"latency.":[44,78],"At":[45],"different":[46],"operational":[47],"resolutions,":[48],"vision":[50,91,118,219],"encoder":[51,119,220],"a":[53,81,115,218],"VLM":[54,182],"can":[55],"be":[56],"optimized":[57,105],"along":[58],"two":[59],"axes:":[60],"reducing":[61],"latency":[63],"minimizing":[65],"passed":[71],"LLM,":[74,211],"thereby":[75],"lowering":[76],"overall":[77],"Based":[79],"on":[80,181,200],"comprehensive":[82],"efficiency":[83],"analysis":[84],"interplay":[87],"between":[88,107,141],"resolution,":[90,108],"latency,":[92,109],"token":[93,143,159],"count,":[94],"LLM":[96],"size,":[97],"we":[98],"introduce":[99],"FastVLM\u2014a":[100],"model":[101,164],"that":[102],"achieves":[103,137,171,197],"an":[104],"tradeoff":[106],"accuracy.":[111],"FastVLM":[112,136,170,196],"incorporates":[113],"FastViTHD,":[114],"novel":[116],"hybrid":[117],"designed":[120],"output":[122],"fewer":[123],"significantly":[126],"reduce":[127],"time":[129],"high-resolution":[131],"images.":[132],"Unlike":[133],"previous":[134],"methods,":[135],"optimal":[139],"balance":[140],"count":[144],"solely":[148],"by":[149],"scaling":[150],"image,":[153],"eliminating":[154],"need":[156],"additional":[158],"pruning":[160],"simplifying":[162],"design.":[165],"In":[166],"LLaVA1.5":[168],"setup,":[169],"3.2\u00d7":[172],"improvement":[173],"time-to-first-token":[175],"(TTFT)":[176],"while":[177],"maintaining":[178],"similar":[179],"benchmarks":[183,202],"compared":[184],"prior":[186],"works.":[187],"Compared":[188],"LLaVa-OneVision":[190],"highest":[193],"(1152\u00d71152),":[195],"comparable":[198],"key":[201],"like":[203],"SeedBench":[204],"MMMU,":[206],"using":[207],"same":[209],"0.5B":[210],"but":[212],"with":[213],"85\u00d7":[214],"faster":[215],"TTFT":[216],"thathttps:":[221],"3.4\u00d7":[223],"smaller.":[224],"Code":[225],"models":[227],"are":[228],"available":[229],"at.":[230],"//github.com/apple/ml-fastvlm":[231]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":6}],"updated_date":"2026-05-14T08:36:36.166977","created_date":"2025-10-10T00:00:00"}
