{"id":"https://openalex.org/W7156028045","doi":"https://doi.org/10.48550/arxiv.2604.22293","title":"HGQ-LUT: Fast LUT-Aware Training and Efficient Architectures for DNN Inference","display_name":"HGQ-LUT: Fast LUT-Aware Training and Efficient Architectures for DNN Inference","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7156028045","doi":"https://doi.org/10.48550/arxiv.2604.22293"},"language":"en","primary_location":{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","is_oa":true,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","pdf_url":null,"source":{"id":"https://openalex.org/S7407055359","display_name":"Explore Bristol Research","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sun, C, Que, Z, Zadeh, B, Liu, Q, Alvarez, K H, Luk, W & Spiropulu, M 2026, HGQ-LUT : Fast LUT-Aware Training and Efficient Architectures for DNN Inference. in The 34th IEEE International Symposium On Field-Programmable Custom Computing Machines. IEEE Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM) Proceedings, Institute of Electrical and Electronics Engineers (IEEE).","raw_type":"contributionToPeriodical"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research-information.bris.ac.uk/en/publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134720189","display_name":"Chang Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Chang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134746231","display_name":"Zhiqiang Que","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Que, Zhiqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134672550","display_name":"Bakhtiar Zadeh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zadeh, Bakhtiar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134695341","display_name":"Qibin Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Qibin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134683977","display_name":"Kevin H. Alvarez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alvarez, Kevin H.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134742620","display_name":"Wayne Luk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luk, Wayne","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134667595","display_name":"Maria Spiropulu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Spiropulu, Maria","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.33379998803138733,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.33379998803138733,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08160000294446945,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.04729999974370003,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6222000122070312},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.491100013256073},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4844000041484833},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.47929999232292175},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4708999991416931},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4047999978065491},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.3384000062942505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7598000168800354},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6222000122070312},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.491100013256073},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4844000041484833},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4823000133037567},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.47929999232292175},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4708999991416931},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4562000036239624},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4047999978065491},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.38199999928474426},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.3384000062942505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3361999988555908},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2980000078678131},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2743000090122223},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C2982832238","wikidata":"https://www.wikidata.org/wiki/Q5531640","display_name":"General purpose","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C157170001","wikidata":"https://www.wikidata.org/wiki/Q4781507","display_name":"Applications of artificial intelligence","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C157922185","wikidata":"https://www.wikidata.org/wiki/Q173198","display_name":"Logic synthesis","level":3,"score":0.2603999972343445}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","is_oa":true,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","pdf_url":null,"source":{"id":"https://openalex.org/S7407055359","display_name":"Explore Bristol Research","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sun, C, Que, Z, Zadeh, B, Liu, Q, Alvarez, K H, Luk, W & Spiropulu, M 2026, HGQ-LUT : Fast LUT-Aware Training and Efficient Architectures for DNN Inference. in The 34th IEEE International Symposium On Field-Programmable Custom Computing Machines. IEEE Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM) Proceedings, Institute of Electrical and Electronics Engineers (IEEE).","raw_type":"contributionToPeriodical"},{"id":"doi:10.48550/arxiv.2604.22293","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22293","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","is_oa":true,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/400c282f-ff9d-48c4-99ab-8ff55d573aa4","pdf_url":null,"source":{"id":"https://openalex.org/S7407055359","display_name":"Explore Bristol Research","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Sun, C, Que, Z, Zadeh, B, Liu, Q, Alvarez, K H, Luk, W & Spiropulu, M 2026, HGQ-LUT : Fast LUT-Aware Training and Efficient Architectures for DNN Inference. in The 34th IEEE International Symposium On Field-Programmable Custom Computing Machines. IEEE Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM) Proceedings, Institute of Electrical and Electronics Engineers (IEEE).","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Lookup-table":[0],"(LUT)":[1],"based":[2],"neural":[3],"networks":[4],"can":[5],"deliver":[6],"ultra-low":[7],"latency":[8],"and":[9,55,89,123,151],"excellent":[10],"hardware":[11,53,74],"efficiency":[12,75],"on":[13,83],"FPGAs":[14],"by":[15,79],"mapping":[16],"arithmetic":[17,162],"operations":[18,99],"directly":[19],"onto":[20],"the":[21,130,176],"logic":[22,107],"primitives.":[23],"However,":[24],"state-of-the-art":[25,73],"LUT-aware":[26,125],"training":[27,78],"(LAT)":[28],"approaches":[29],"remain":[30],"difficult":[31],"to":[32,43],"use":[33],"in":[34,65],"practice:":[35],"they":[36],"are":[37,93,103],"often":[38],"orders":[39],"of":[40,133,154],"magnitude":[41],"slower":[42],"train":[44],"than":[45],"conventional":[46,161],"networks,":[47],"require":[48],"non-trivial":[49],"manual":[50,137],"tuning":[51],"for":[52,109,170],"efficiency,":[54],"lack":[56],"an":[57],"end-to-end":[58],"workflow.":[59],"This":[60],"work":[61],"presents":[62],"HGQ-LUT,":[63],"integrated":[64],"https://github.com/calad0i/HGQ2,":[66],"a":[67,124],"new":[68],"LAT":[69],"approach":[70],"that":[71,92,157],"achieves":[72],"while":[76],"accelerating":[77],"over":[80],"100":[81],"times":[82],"modern":[84],"GPUs.":[85],"HGQ-LUT":[86,128,143],"introduces":[87],"LUT-Dense":[88],"LUT-Conv":[90],"layers":[91,114],"implemented":[94],"with":[95,115,160],"regular,":[96],"accelerator-efficient":[97],"tensor":[98],"during":[100],"training,":[101],"which":[102],"then":[104],"compiled":[105],"into":[106,144],"LUTs":[108],"hardware.":[110],"By":[111],"combining":[112],"these":[113],"fine-grained,":[116],"element-wise":[117],"heterogeneous":[118],"quantization":[119],"(including":[120],"zero-bit":[121],"pruning)":[122],"resource":[126],"surrogate,":[127],"enables":[129],"automatic":[131],"exploration":[132],"accuracy-resource":[134],"trade-offs":[135],"without":[136],"bit-width":[138],"tuning.":[139],"We":[140],"further":[141],"integrate":[142],"open-source":[145],"toolchains,":[146],"enabling":[147],"unified":[148],"design,":[149],"compilation,":[150],"bit-exact":[152],"verification":[153],"hybrid":[155],"architectures":[156],"mix":[158],"LUT-based":[159],"blocks.":[163],"These":[164],"features":[165],"make":[166],"LAT-based":[167],"DNNs":[168],"practical":[169],"real-world":[171],"deployment,":[172],"such":[173],"as":[174],"at":[175],"CERN":[177],"Large":[178],"Hadron":[179],"Collider's":[180],"experiments.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-28T00:00:00"}
