{"id":"https://openalex.org/W7133188755","doi":"https://doi.org/10.1109/lca.2026.3669169","title":"VLA-TAGE: A Power-Efficient Branch Predictor With Vector-Loop Awareness","display_name":"VLA-TAGE: A Power-Efficient Branch Predictor With Vector-Loop Awareness","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7133188755","doi":"https://doi.org/10.1109/lca.2026.3669169"},"language":null,"primary_location":{"id":"doi:10.1109/lca.2026.3669169","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2026.3669169","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127783133","display_name":"Chen Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen Zhang","raw_affiliation_strings":["Defense Innovation Institute, Academy of Military Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, Academy of Military Science, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127866444","display_name":"Shiqing Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiqing Zhang","raw_affiliation_strings":["Defense Innovation Institute, Academy of Military Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, Academy of Military Science, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049705266","display_name":"Guangda Zhang","orcid":"https://orcid.org/0000-0003-4732-9674"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guangda Zhang","raw_affiliation_strings":["Defense Innovation Institute, Academy of Military Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, Academy of Military Science, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100953263","display_name":"Xiaobo Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210092870","display_name":"Jiaxing University","ror":"https://ror.org/00j2a7k55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210092870"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobo Yan","raw_affiliation_strings":["Nanhu Laboratory, Jiaxing, China"],"affiliations":[{"raw_affiliation_string":"Nanhu Laboratory, Jiaxing, China","institution_ids":["https://openalex.org/I4210092870"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5127825693","display_name":"Xia Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia Zhao","raw_affiliation_strings":["Defense Innovation Institute, Academy of Military Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Defense Innovation Institute, Academy of Military Science, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5127783133"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.52583201,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"25","issue":"1","first_page":"105","last_page":"109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.05350000038743019,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.05350000038743019,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.05009999871253967,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.03709999844431877,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/branch-predictor","display_name":"Branch predictor","score":0.724399983882904},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.7121999859809875},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6510999798774719},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6211000084877014},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.566100001335144},{"id":"https://openalex.org/keywords/power-consumption","display_name":"Power consumption","score":0.5609999895095825},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.53329998254776},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.4336000084877014},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4268999993801117}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8586000204086304},{"id":"https://openalex.org/C168522837","wikidata":"https://www.wikidata.org/wiki/Q679552","display_name":"Branch predictor","level":2,"score":0.724399983882904},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.7121999859809875},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6510999798774719},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6211000084877014},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.566100001335144},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.5609999895095825},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.53329998254776},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4494999945163727},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.4336000084877014},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4268999993801117},{"id":"https://openalex.org/C133588205","wikidata":"https://www.wikidata.org/wiki/Q28455645","display_name":"Instruction prefetch","level":3,"score":0.4108000099658966},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3953000009059906},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.3898000121116638},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3792000114917755},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29100000858306885},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C2778787235","wikidata":"https://www.wikidata.org/wiki/Q49007","display_name":"Yarn","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C118993495","wikidata":"https://www.wikidata.org/wiki/Q5042828","display_name":"Electrical efficiency","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C194080101","wikidata":"https://www.wikidata.org/wiki/Q46306","display_name":"Access time","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C147297375","wikidata":"https://www.wikidata.org/wiki/Q6674930","display_name":"Look-ahead","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lca.2026.3669169","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lca.2026.3669169","pdf_url":null,"source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data-parallel":[0],"workloads,":[1],"such":[2],"as":[3],"machine":[4],"learning,":[5],"computer":[6],"vision,":[7],"and":[8,31,44],"data":[9],"analytics,":[10],"increasingly":[11],"run":[12],"on":[13,17,100,157],"mobile":[14],"SoCs":[15],"(System":[16],"Chip)":[18],"with":[19,88],"SIMD":[20,120],"(Single":[21],"Instruction,":[22],"Multiple":[23],"Data)":[24],"engines":[25],"to":[26,36,82,115,134],"provide":[27],"real-time":[28],"user":[29],"interactions":[30],"security.":[32],"It":[33],"is":[34],"crucial":[35],"design":[37],"processor":[38],"architectures":[39],"that":[40,55,111],"balance":[41],"high":[42],"performance":[43],"power":[45,79,155],"efficiency.":[46],"In":[47,85],"this":[48,101],"paper,":[49],"we":[50,103],"make":[51],"the":[52,65,138],"key":[53],"observation":[54],"these":[56],"data-parallel":[57,158],"workloads":[58],"exhibit":[59],"regular,":[60],"predictable":[61,119],"control":[62,128],"flow,":[63],"reducing":[64,137],"need":[66],"for":[67,126],"complex":[68],"branch":[69],"prediction":[70,93],"mechanisms.":[71],"However,":[72],"state-of-the-art":[73],"predictors":[74],"like":[75],"TAGE":[76,116],"incur":[77],"substantial":[78],"overhead":[80],"due":[81],"multi-table":[83],"lookups.":[84],"SIMD-intensive":[86],"regions":[87],"long,":[89],"regular":[90],"loops,":[91],"simpler":[92],"schemes":[94],"can":[95],"achieve":[96],"comparable":[97,133],"accuracy.":[98],"Based":[99],"insight,":[102],"propose":[104],"Vector-Loop-Aware":[105],"TAGE(VLA-TAGE),":[106],"using":[107],"a":[108,151],"lightweight":[109],"mechanism":[110],"dynamically":[112],"gates":[113],"access":[114],"tables":[117],"during":[118],"execution":[121],"while":[122,136],"restoring":[123],"full":[124],"functionality":[125],"irregular":[127],"flow.":[129],"VLA-TAGE":[130],"maintains":[131],"accuracy":[132],"TAGE-SC-L":[135],"number":[139],"of":[140,147],"history":[141],"table":[142],"accesses":[143],"by":[144],"an":[145],"average":[146],"63%,":[148],"resulting":[149],"in":[150,154],"51%":[152],"reduction":[153],"consumption":[156],"workloads.":[159]},"counts_by_year":[],"updated_date":"2026-03-28T06:11:35.319607","created_date":"2026-03-03T00:00:00"}
