{"id":"https://openalex.org/W7140886584","doi":"https://doi.org/10.1109/fpl68686.2025.00044","title":"Refining Datapath for Microscaling ViTs","display_name":"Refining Datapath for Microscaling ViTs","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W7140886584","doi":"https://doi.org/10.1109/fpl68686.2025.00044"},"language":null,"primary_location":{"id":"doi:10.1109/fpl68686.2025.00044","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fpl68686.2025.00044","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 35th International Conference on Field-Programmable Logic and Applications (FPL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124899439","display_name":"Can Xiao","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Can Xiao","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060302344","display_name":"Jianyi Cheng","orcid":"https://orcid.org/0000-0003-2791-2555"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jianyi Cheng","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130650694","display_name":"Yiren Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yiren Zhao","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5124899439"],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.80830638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"263","last_page":"272"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08720000088214874,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08720000088214874,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11522","display_name":"VLSI and FPGA Design Techniques","score":0.05829999968409538,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12452","display_name":"Electrowetting and Microfluidic Technologies","score":0.04270000010728836,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.8938999772071838},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7462999820709229},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6642000079154968},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5584999918937683},{"id":"https://openalex.org/keywords/high-level-synthesis","display_name":"High-level synthesis","score":0.5519999861717224},{"id":"https://openalex.org/keywords/hardware-architecture","display_name":"Hardware architecture","score":0.4690999984741211},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.37860000133514404},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.37450000643730164},{"id":"https://openalex.org/keywords/hardware-compatibility-list","display_name":"Hardware compatibility list","score":0.34060001373291016}],"concepts":[{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.8938999772071838},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7540000081062317},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7462999820709229},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6642000079154968},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5584999918937683},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.5519999861717224},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.48410001397132874},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.4690999984741211},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4625000059604645},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.37450000643730164},{"id":"https://openalex.org/C161394538","wikidata":"https://www.wikidata.org/wiki/Q3127397","display_name":"Hardware compatibility list","level":4,"score":0.34060001373291016},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.33469998836517334},{"id":"https://openalex.org/C2908650547","wikidata":"https://www.wikidata.org/wiki/Q20999234","display_name":"Intrinsics","level":2,"score":0.31220000982284546},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3093999922275543},{"id":"https://openalex.org/C157922185","wikidata":"https://www.wikidata.org/wiki/Q173198","display_name":"Logic synthesis","level":3,"score":0.30309998989105225},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.28459998965263367},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C36941000","wikidata":"https://www.wikidata.org/wiki/Q209455","display_name":"VHDL","level":3,"score":0.26350000500679016},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C70970002","wikidata":"https://www.wikidata.org/wiki/Q189434","display_name":"Multiplexer","level":3,"score":0.25279998779296875},{"id":"https://openalex.org/C142962650","wikidata":"https://www.wikidata.org/wiki/Q240838","display_name":"Reconfigurable computing","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fpl68686.2025.00044","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fpl68686.2025.00044","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 35th International Conference on Field-Programmable Logic and Applications (FPL)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2513568085","https://openalex.org/W2565125333","https://openalex.org/W2884150179","https://openalex.org/W3017024317","https://openalex.org/W3043504674","https://openalex.org/W3100985894","https://openalex.org/W3206453033","https://openalex.org/W3206774775","https://openalex.org/W3207622241","https://openalex.org/W4281660701","https://openalex.org/W4308083513","https://openalex.org/W4308083526","https://openalex.org/W4308083841","https://openalex.org/W4312453657","https://openalex.org/W4318541578","https://openalex.org/W4321637298","https://openalex.org/W4360831786","https://openalex.org/W4380874652","https://openalex.org/W4380881077","https://openalex.org/W4383749366","https://openalex.org/W4386763903","https://openalex.org/W4387789586","https://openalex.org/W4389519100","https://openalex.org/W4390097935","https://openalex.org/W4390873361","https://openalex.org/W4393406920","https://openalex.org/W4393578753","https://openalex.org/W4393949386","https://openalex.org/W4394871707","https://openalex.org/W4403278875","https://openalex.org/W4403278953","https://openalex.org/W4404238186","https://openalex.org/W7133228638","https://openalex.org/W7133231030"],"related_works":[],"abstract_inverted_index":{"Vision":[0],"Transformers":[1],"(ViTs)":[2],"leverage":[3],"the":[4,28,43,64,69,79,88,92,108,118,163],"transformer":[5],"architecture":[6],"to":[7,38,87,145,204,215],"effectively":[8],"capture":[9],"global":[10],"context,":[11],"demonstrating":[12],"strong":[13],"performance":[14,85],"in":[15,22,63,83],"computer":[16],"vision":[17],"tasks.":[18],"A":[19],"major":[20],"challenge":[21],"ViT":[23,111,119],"hardware":[24,54,72,149,152,177],"acceleration":[25],"is":[26],"that":[27,35,113,179],"model":[29,39,70],"family":[30],"contains":[31],"complex":[32,76,182],"arithmetic":[33,127,183],"operations":[34,67,77,116,184],"are":[36,156],"sensitive":[37],"accuracy,":[40,148],"such":[41],"as":[42],"Softmax":[44],"and":[45,73,94,136,151,171,206],"LayerNorm":[46],"operations,":[47],"which":[48],"cannot":[49],"be":[50,143],"mapped":[51],"onto":[52,121],"efficient":[53],"with":[55],"low":[56],"precision.":[57],"Existing":[58],"methods":[59],"only":[60],"exploit":[61,124],"parallelism":[62],"matrix":[65],"multiplication":[66],"of":[68,117],"on":[71,78],"keep":[74],"these":[75,181],"CPU.":[80],"This":[81],"results":[82],"suboptimal":[84],"due":[86],"communication":[89],"overhead":[90],"between":[91],"CPU":[93],"accelerator.":[95],"Can":[96],"new":[97,126],"data":[98],"formats":[99],"solve":[100],"this":[101,104],"problem?":[102],"In":[103],"work,":[105],"we":[106,159,174],"present":[107],"first":[109],"open-source":[110],"accelerator":[112],"maps":[114],"all":[115],"models":[120],"FPGAs.":[122],"We":[123],"a":[125],"format":[128],"named":[129],"Microscaling":[130],"Integer":[131],"(MXInt)":[132],"for":[133],"datapath":[134],"designs":[135],"evaluate":[137],"how":[138],"different":[139],"design":[140],"choices":[141],"can":[142],"made":[144],"trade":[146],"off":[147],"performance,":[150],"utilization.":[153],"Our":[154],"contributions":[155],"twofold.":[157],"First,":[158],"quantize":[160],"ViTs":[161],"using":[162],"MXInt":[164],"format,":[165],"achieving":[166],"both":[167],"high":[168],"area":[169],"efficiency":[170],"accuracy.":[172],"Second,":[173],"propose":[175],"MXInt-specific":[176],"optimization":[178],"map":[180],"into":[185],"custom":[186],"hardware.":[187],"Within":[188],"1":[189],"%":[190],"accuracy":[191],"loss,":[192],"our":[193],"method":[194],"achieves":[195],"at":[196,207],"least":[197,208],"<tex":[198,209],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[199,210],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$93":[200],"\\times$</tex>":[201,212],"speedup":[202,213],"compared":[203,214],"Float16":[205],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.9":[211],"related":[216],"work.":[217]},"counts_by_year":[],"updated_date":"2026-03-28T06:11:35.319607","created_date":"2026-03-27T00:00:00"}
