{"id":"https://openalex.org/W4393578766","doi":"https://doi.org/10.1145/3626202.3637567","title":"A Statically and Dynamically Scalable Soft GPGPU","display_name":"A Statically and Dynamically Scalable Soft GPGPU","publication_year":2024,"publication_date":"2024-04-01","ids":{"openalex":"https://openalex.org/W4393578766","doi":"https://doi.org/10.1145/3626202.3637567"},"language":"en","primary_location":{"id":"doi:10.1145/3626202.3637567","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626202.3637567","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637567","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637567","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082265695","display_name":"Martin Langhammer","orcid":"https://orcid.org/0000-0001-8206-2077"},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]},{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Martin Langhammer","raw_affiliation_strings":["Intel Corporation &amp; Imperial College London, Swindon, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0001-8206-2077","affiliations":[{"raw_affiliation_string":"Intel Corporation &amp; Imperial College London, Swindon, United Kingdom","institution_ids":["https://openalex.org/I4210158342","https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029829952","display_name":"George A. Constantinides","orcid":"https://orcid.org/0000-0002-0201-310X"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"George A. Constantinides","raw_affiliation_strings":["Imperial College London, London, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-0201-310X","affiliations":[{"raw_affiliation_string":"Imperial College London, London, United Kingdom","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2149,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.87767014,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"165","last_page":"175"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8327542543411255},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6288589239120483},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.6204100847244263},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6175587773323059},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.616611659526825},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.5902504324913025},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.5596233606338501},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5172948837280273},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.5163927674293518},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.5113822817802429},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.4956260919570923},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.49365195631980896},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.38987407088279724},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.24619746208190918},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.1253776252269745},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.09411734342575073}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8327542543411255},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6288589239120483},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.6204100847244263},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6175587773323059},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.616611659526825},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.5902504324913025},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.5596233606338501},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5172948837280273},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.5163927674293518},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.5113822817802429},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.4956260919570923},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.49365195631980896},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.38987407088279724},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.24619746208190918},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.1253776252269745},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.09411734342575073},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626202.3637567","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626202.3637567","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637567","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3626202.3637567","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3626202.3637567","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3626202.3637567","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM/SIGDA International Symposium on Field Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4393578766.pdf"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W799594534","https://openalex.org/W1970047515","https://openalex.org/W1987163471","https://openalex.org/W2025981567","https://openalex.org/W2048166678","https://openalex.org/W2063488795","https://openalex.org/W2092546074","https://openalex.org/W2133712953","https://openalex.org/W2141389982","https://openalex.org/W2155729354","https://openalex.org/W2296385998","https://openalex.org/W2766677151","https://openalex.org/W2892053304","https://openalex.org/W2898348861","https://openalex.org/W3008619650","https://openalex.org/W4241765343","https://openalex.org/W4249459551","https://openalex.org/W4388214717"],"related_works":["https://openalex.org/W2370279919","https://openalex.org/W2091793939","https://openalex.org/W2152301037","https://openalex.org/W2055881261","https://openalex.org/W2094210932","https://openalex.org/W4250432526","https://openalex.org/W1836109266","https://openalex.org/W2384717171","https://openalex.org/W2027246841","https://openalex.org/W2101536355"],"abstract_inverted_index":{"Current":[0],"soft":[1,34,68,82,239],"processor":[2,84,212],"architectures":[3],"for":[4,55,203],"FPGAs":[5,16],"do":[6],"not":[7],"utilize":[8],"the":[9,12,43,67,72,102,106,111,137,160,196,211,216,220],"potential":[10],"of":[11,21,105,136,163,182,219,231],"massive":[13],"parallelism":[14],"available.":[15,76],"now":[17],"support":[18],"many":[19],"thousands":[20],"embedded":[22,108],"floating":[23],"point":[24],"operators,":[25],"and":[26,47,88,175,234],"have":[27,39],"similar":[28],"computational":[29],"densities":[30],"to":[31,165,171,177,215,236],"GPGPUs.":[32],"Several":[33],"GPGPU":[35,83],"or":[36,114],"SIMT":[37],"processors":[38],"been":[40],"published,":[41],"but":[42],"reported":[44],"large":[45],"areas":[46],"modest":[48],"Fmax":[49],"makes":[50],"their":[51],"widespread":[52],"use":[53],"unlikely":[54],"commercial":[56,238],"designs.":[57],"In":[58],"this":[59,206],"paper":[60],"we":[61,152],"take":[62],"an":[63,144],"alternative":[64],"approach,":[65],"building":[66],"GPU":[69],"microarchitecture":[70],"around":[71],"FPGA":[73,112,221],"resource":[74],"mix":[75],"We":[77,128,199,223],"demonstrate":[78],"a":[79,118,123,134,154,190,229,237],"statically":[80],"scalable":[81],"(where":[85],"both":[86],"parameters":[87],"feature":[89],"set":[90],"can":[91,140],"be":[92,141],"determined":[93],"at":[94,101,187],"configuration":[95,161],"time)":[96],"that":[97],"always":[98],"closes":[99],"timing":[100,186],"peak":[103],"speed":[104],"slowest":[107],"component":[109],"in":[110],"(DSP":[113],"hard":[115],"memory),":[116],"with":[117,169],"completely":[119],"unconstrained":[120],"compile":[121],"into":[122],"current":[124],"Intel":[125],"Agilex":[126],"FPGA.":[127],"also":[129,224],"show":[130,153],"dynamic":[131],"scalability,":[132],"where":[133],"subset":[135],"thread":[138],"space":[139],"specified":[142],"on":[143,159],"instruction-by-instruction":[145],"basis.":[146],"For":[147],"one":[148],"example":[149],"core":[150],"type,":[151],"logic":[155],"range":[156,230],"--":[157,162],"depending":[158],"4k":[164],"10k":[166],"ALMs,":[167],"along":[168],"24":[170],"32":[172],"DSP":[173,197],"Blocks,":[174],"50":[176],"250":[178],"M20K":[179],"memories.":[180],"All":[181],"these":[183],"instances":[184],"close":[185],"771":[188],"MHz,":[189],"performance":[191],"level":[192],"limited":[193],"only":[194],"by":[195,209],"Blocks.":[198],"describe":[200],"our":[201],"methodology":[202],"reliably":[204],"achieving":[205],"clock":[207],"rate":[208],"matching":[210],"pipeline":[213],"structure":[214,218],"physical":[217],"fabric.":[222],"benchmark":[225],"several":[226],"algorithms":[227],"across":[228],"data":[232],"sizes,":[233],"compare":[235],"RISC":[240],"processor.":[241]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
