{"id":"https://openalex.org/W7133532429","doi":"https://doi.org/10.1109/hpca68181.2026.11408525","title":"Focus: A Streaming Concentration Architecture for Efficient Vision-Language Models","display_name":"Focus: A Streaming Concentration Architecture for Efficient Vision-Language Models","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133532429","doi":"https://doi.org/10.1109/hpca68181.2026.11408525"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408525","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408525","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060545776","display_name":"Chiyue Wei","orcid":"https://orcid.org/0009-0008-8815-7948"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chiyue Wei","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cong Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cong Guo","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128092508","display_name":"Junyao Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junyao Zhang","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100585144","display_name":"Haoxuan Shan","orcid":"https://orcid.org/0009-0000-9671-6713"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoxuan Shan","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048554824","display_name":"Yifan Xu","orcid":"https://orcid.org/0009-0001-5455-985X"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Xu","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600502","display_name":"Ziyue Zhang","orcid":"https://orcid.org/0000-0002-8219-8510"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziyue Zhang","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128097151","display_name":"Yudong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yudong Liu","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128127245","display_name":"Qinsi Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qinsi Wang","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088778898","display_name":"Changchun Zhou","orcid":"https://orcid.org/0009-0005-3968-5048"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changchun Zhou","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123713900","display_name":"Hai Helen Li","orcid":null},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai Helen Li","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103170781","display_name":"Yuansi Chen","orcid":"https://orcid.org/0000-0002-8899-7380"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["Duke University"],"affiliations":[{"raw_affiliation_string":"Duke University","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5060545776"],"corresponding_institution_ids":["https://openalex.org/I170897317"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46086387,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8004999756813049,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8004999756813049,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.01850000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.016499999910593033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.38989999890327454},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.27630001306533813},{"id":"https://openalex.org/keywords/systems-architecture","display_name":"Systems architecture","score":0.27469998598098755},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.24650000035762787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6057999730110168},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.39489999413490295},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.38989999890327454},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2815999984741211},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.27469998598098755},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.26269999146461487},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.24650000035762787},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.23929999768733978},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.23899999260902405}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408525","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408525","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2067523571","https://openalex.org/W2140199336","https://openalex.org/W2442974303","https://openalex.org/W2560730294","https://openalex.org/W2606722458","https://openalex.org/W2979826702","https://openalex.org/W3006586535","https://openalex.org/W3130554079","https://openalex.org/W3132616766","https://openalex.org/W3159727696","https://openalex.org/W4308083739","https://openalex.org/W4360831786","https://openalex.org/W4360831795","https://openalex.org/W4366341968","https://openalex.org/W4385245566","https://openalex.org/W4386066385","https://openalex.org/W4386071707","https://openalex.org/W4394998519","https://openalex.org/W4402727142","https://openalex.org/W4402727885","https://openalex.org/W4403081466","https://openalex.org/W4403939369","https://openalex.org/W4404955778","https://openalex.org/W4409248468","https://openalex.org/W4409248600","https://openalex.org/W4409248601","https://openalex.org/W4409248709","https://openalex.org/W4409261973","https://openalex.org/W4411232024","https://openalex.org/W4411486323","https://openalex.org/W4411486490","https://openalex.org/W4411486557","https://openalex.org/W4412887841","https://openalex.org/W4413144845","https://openalex.org/W4413146669","https://openalex.org/W4413157571"],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"have":[3],"demonstrated":[4],"strong":[5],"performance":[6,186],"on":[7,36,107],"tasks":[8],"such":[9],"as":[10,157],"video":[11],"captioning":[12],"and":[13,21,28,58,117,145,172,187],"visual":[14],"question":[15],"answering.":[16],"However,":[17],"their":[18],"growing":[19],"scale":[20],"video-level":[22],"inputs":[23,98],"lead":[24],"to":[25,43,64,134,148],"significant":[26],"computational":[27],"memory":[29],"overhead,":[30],"posing":[31],"challenges":[32],"for":[33],"real-time":[34],"deployment":[35],"hardware":[37],"accelerators.":[38],"While":[39],"prior":[40],"work":[41],"attempts":[42],"reduce":[44],"redundancy":[45,86,120],"via":[46,122],"token":[47,104],"pruning":[48,105],"or":[49],"merging,":[50],"these":[51],"methods":[52],"typically":[53],"operate":[54],"at":[55,99,196],"coarse":[56],"granularity":[57],"incur":[59],"high":[60,154],"runtime":[61],"overhead":[62],"due":[63],"global":[65],"token-level":[66],"operations.":[67],"In":[68],"this":[69],"study,":[70],"we":[71],"propose":[72],"Focus,":[73],"a":[74,90,158,162],"Streaming":[75],"Concentration":[76],"Architecture":[77],"that":[78,94],"efficiently":[79],"accelerates":[80],"VLM":[81],"inference":[82],"through":[83],"progressive,":[84],"fine-grained":[85],"elimination.":[87],"Focus":[88,139,165,193],"introduces":[89],"multilevel":[91],"concentration":[92,113,126],"paradigm":[93],"hierarchically":[95],"compresses":[96],"vision-language":[97],"three":[100],"levels:":[101],"(1)":[102],"semantic-guided":[103],"based":[106],"textual":[108],"prompts,":[109],"(2)":[110],"spatial-temporal":[111],"blocklevel":[112],"using":[114],"localized":[115],"comparisons,":[116],"(3)":[118],"vectorlevel":[119],"removal":[121],"motion-aware":[123],"matching.":[124],"All":[125],"steps":[127],"are":[128],"tightly":[129],"co-designed":[130],"with":[131],"the":[132],"architecture":[133],"support":[135],"streaming-friendly,":[136],"on-chip":[137],"execution.":[138],"leverages":[140],"GEMM":[141],"tiling,":[142],"convolution-style":[143],"layout,":[144],"cross-modal":[146],"attention":[147],"minimize":[149],"off-chip":[150],"access":[151],"while":[152],"enabling":[153],"throughput.":[155],"Implemented":[156],"modular":[159],"unit":[160],"within":[161],"systolic-array":[163],"accelerator,":[164],"achieves":[166],"<tex":[167,173],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[168,174],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$2.4":[169],"\\times$</tex>":[170,176],"speedup":[171],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$3.3":[175],"reduction":[177],"in":[178,184],"energy,":[179],"significantly":[180],"outperforming":[181],"state-of-the-art":[182],"accelerator":[183],"both":[185],"energy":[188],"efficiency.":[189],"Full-stack":[190],"implementation":[191],"of":[192],"is":[194],"open-sourced":[195],"https://github.com/dubcyfor3/Focus.":[197]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-03-05T00:00:00"}
