{"id":"https://openalex.org/W2996795721","doi":"https://doi.org/10.1145/3173548","title":"General-Purpose Computing with Soft GPUs on FPGAs","display_name":"General-Purpose Computing with Soft GPUs on FPGAs","publication_year":2018,"publication_date":"2018-01-24","ids":{"openalex":"https://openalex.org/W2996795721","doi":"https://doi.org/10.1145/3173548","mag":"2996795721"},"language":"en","primary_location":{"id":"doi:10.1145/3173548","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3173548","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003242515","display_name":"Muhammed Al Kadi","orcid":"https://orcid.org/0000-0003-0365-730X"},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Muhammed Al Kadi","raw_affiliation_strings":["Ruhr University of Bochum, Germany"],"raw_orcid":"https://orcid.org/0000-0003-0365-730X","affiliations":[{"raw_affiliation_string":"Ruhr University of Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014124149","display_name":"Benedikt Jan\u00dfen","orcid":"https://orcid.org/0000-0002-4512-9268"},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Benedikt Janssen","raw_affiliation_strings":["Ruhr University of Bochum, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ruhr University of Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112842321","display_name":"Jones Yudi","orcid":null},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jones Yudi","raw_affiliation_strings":["Ruhr University of Bochum, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ruhr University of Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114423005","display_name":"Michael Huebner","orcid":null},"institutions":[{"id":"https://openalex.org/I904495901","display_name":"Ruhr University Bochum","ror":"https://ror.org/04tsk2644","country_code":"DE","type":"education","lineage":["https://openalex.org/I904495901"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Michael Huebner","raw_affiliation_strings":["Ruhr University of Bochum, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ruhr University of Bochum, Germany","institution_ids":["https://openalex.org/I904495901"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5003242515"],"corresponding_institution_ids":["https://openalex.org/I904495901"],"apc_list":null,"apc_paid":null,"fwci":4.0365,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.95383195,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"11","issue":"1","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8550131320953369},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.634553074836731},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6077011227607727},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5914381742477417},{"id":"https://openalex.org/keywords/microblaze","display_name":"MicroBlaze","score":0.5673756003379822},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.5385739207267761},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4836767613887787},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.47495365142822266},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.4525938034057617},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.4479519724845886},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3936465382575989},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.22854050993919373},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11900538206100464},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09415483474731445}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8550131320953369},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.634553074836731},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6077011227607727},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5914381742477417},{"id":"https://openalex.org/C2777575374","wikidata":"https://www.wikidata.org/wiki/Q1644704","display_name":"MicroBlaze","level":3,"score":0.5673756003379822},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.5385739207267761},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4836767613887787},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.47495365142822266},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.4525938034057617},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.4479519724845886},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3936465382575989},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.22854050993919373},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11900538206100464},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09415483474731445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3173548","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3173548","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8999999761581421,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W799594534","https://openalex.org/W1977933385","https://openalex.org/W2037748535","https://openalex.org/W2123990034","https://openalex.org/W2296385998","https://openalex.org/W2407650548","https://openalex.org/W2602816542","https://openalex.org/W2738183326"],"related_works":["https://openalex.org/W2166107669","https://openalex.org/W2154961667","https://openalex.org/W2113902926","https://openalex.org/W2001957815","https://openalex.org/W1564887326","https://openalex.org/W2370279919","https://openalex.org/W2091734216","https://openalex.org/W2152301037","https://openalex.org/W2586397364","https://openalex.org/W2116803521"],"abstract_inverted_index":{"Using":[0],"field-programmable":[1],"gate":[2],"arrays":[3],"(FPGAs)":[4],"as":[5,118,120],"a":[6,23,81,109,145,149,163,174,218],"substrate":[7],"to":[8,136,227],"deploy":[9],"soft":[10,50,60,146],"graphics":[11],"processing":[12],"units":[13],"(GPUs)":[14],"would":[15,41],"enable":[16],"offering":[17],"the":[18,43,55,126,191,196,210],"FPGA":[19],"compute":[20,166],"power":[21,47],"in":[22,98,104,180],"very":[24],"flexible":[25],"GPU-like":[26],"tool":[27,122],"flow.":[28,123],"Application-specific":[29],"adaptations":[30],"like":[31],"selective":[32],"hardening":[33],"of":[34,49,59,187],"floating-point":[35,182,206],"operations":[36],"and":[37,46,57,70,84,95,106,156,168],"instruction":[38],"set":[39],"subsetting":[40],"mitigate":[42],"high":[44],"area":[45,155],"demands":[48],"GPUs.":[51],"This":[52,112],"work":[53],"explores":[54],"capabilities":[56],"limitations":[58],"General":[61],"Purpose":[62],"Computing":[63],"on":[64,217],"GPUs":[65],"(GPGPU)":[66],"for":[67,90,203],"both":[68],"fixed-":[69,204],"floating":[71],"point":[72],"arithmetic.":[73],"For":[74],"this":[75],"purpose,":[76],"we":[77,141,214],"have":[78],"developed":[79],"FGPU:":[80],"configurable,":[82],"scalable,":[83],"portable":[85],"GPU":[86,147],"architecture":[87,117],"designed":[88],"especially":[89],"FPGAs.":[91],"FGPU":[92,161,212],"is":[93,148],"open-source":[94],"implemented":[96],"entirely":[97],"RTL.":[99],"It":[100],"can":[101,223],"be":[102],"programmed":[103],"OpenCL":[105],"controlled":[107],"through":[108],"Python":[110],"API.":[111],"article":[113],"introduces":[114],"its":[115,121],"hardware":[116],"well":[119],"We":[124],"evaluated":[125],"proposed":[127],"GPGPU":[128],"approach":[129],"against":[130],"multiple":[131],"other":[132],"solutions.":[133],"In":[134,208],"comparison":[135],"homogeneous":[137],"Multi-Processor":[138],"System-On-Chips":[139],"(MPSoCs),":[140],"found":[142],"that":[143],"using":[144],"Pareto-optimal":[150],"solution":[151],"regarding":[152],"throughput":[153],"per":[154],"energy":[157,171],"consumption.":[158],"On":[159],"average,":[160],"has":[162,200],"2.9\u00d7":[164],"better":[165],"density":[167],"11.2\u00d7":[169],"less":[170],"consumption":[172],"than":[173],"single":[175],"MicroBlaze":[176],"processor":[177],"when":[178],"computing":[179],"IEEE-754":[181],"format.":[183],"An":[184],"average":[185],"speedup":[186],"about":[188],"4\u00d7":[189],"over":[190],"ARM":[192],"Cortex-A9":[193],"supported":[194],"with":[195,230],"NEON":[197],"vector":[198],"co-processor":[199],"been":[201],"measured":[202],"or":[205],"benchmarks.":[207],"addition,":[209],"biggest":[211],"cores":[213],"could":[215],"implement":[216],"Xilinx":[219],"Zynq-7000":[220],"System-On-Chip":[221],"(SoC)":[222],"deliver":[224],"similar":[225],"performance":[226],"equivalent":[228],"implementations":[229],"High-Level":[231],"Synthesis":[232],"(HLS).":[233]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
