{"id":"https://openalex.org/W2731391612","doi":"https://doi.org/10.1145/3079757","title":"Reducing the Performance Gap between Soft Scalar CPUs and Custom Hardware with TILT","display_name":"Reducing the Performance Gap between Soft Scalar CPUs and Custom Hardware with TILT","publication_year":2017,"publication_date":"2017-06-27","ids":{"openalex":"https://openalex.org/W2731391612","doi":"https://doi.org/10.1145/3079757","mag":"2731391612"},"language":"en","primary_location":{"id":"doi:10.1145/3079757","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3079757","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041948391","display_name":"Ilian Tili","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Ilian Tili","raw_affiliation_strings":["University of Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038297462","display_name":"Kalin Ovtcharov","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Kalin Ovtcharov","raw_affiliation_strings":["University of Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110755517","display_name":"J. Gregory Steffan","orcid":null},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"J. Gregory Steffan","raw_affiliation_strings":["University of Toronto, Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"University of Toronto, Ontario, Canada","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041948391"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":null,"apc_paid":null,"fwci":0.2253,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.48529587,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"10","issue":"3","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.909076452255249},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7963380813598633},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7571408748626709},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5551971197128296},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.533982515335083},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4878251850605011},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4315100312232971},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.41302716732025146}],"concepts":[{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.909076452255249},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7963380813598633},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7571408748626709},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5551971197128296},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.533982515335083},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4878251850605011},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4315100312232971},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.41302716732025146},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3079757","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3079757","pdf_url":null,"source":{"id":"https://openalex.org/S112809824","display_name":"ACM Transactions on Reconfigurable Technology and Systems","issn_l":"1936-7406","issn":["1936-7406","1936-7414"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Reconfigurable Technology and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W105298322","https://openalex.org/W286049110","https://openalex.org/W1502361557","https://openalex.org/W1506585561","https://openalex.org/W1590358180","https://openalex.org/W1964784760","https://openalex.org/W1985940938","https://openalex.org/W1998922872","https://openalex.org/W1999923180","https://openalex.org/W2016889342","https://openalex.org/W2024080903","https://openalex.org/W2025787141","https://openalex.org/W2027077493","https://openalex.org/W2054388295","https://openalex.org/W2076323772","https://openalex.org/W2077791698","https://openalex.org/W2085189212","https://openalex.org/W2111683449","https://openalex.org/W2114870379","https://openalex.org/W2117041600","https://openalex.org/W2121076909","https://openalex.org/W2129183345","https://openalex.org/W2129565950","https://openalex.org/W2130094715","https://openalex.org/W2139836711","https://openalex.org/W2151731729","https://openalex.org/W2155946985","https://openalex.org/W2157758640","https://openalex.org/W2602816542","https://openalex.org/W4232919122"],"related_works":["https://openalex.org/W2109699519","https://openalex.org/W970262775","https://openalex.org/W1831618318","https://openalex.org/W2907761035","https://openalex.org/W2006568360","https://openalex.org/W4244724753","https://openalex.org/W2059591361","https://openalex.org/W2535673728","https://openalex.org/W1972081536","https://openalex.org/W102726818"],"abstract_inverted_index":{"By":[0],"using":[1],"resource":[2],"sharing":[3],"field-programmable":[4],"gate":[5],"array":[6],"(FPGA)":[7],"compute":[8,41],"engines,":[9],"we":[10,78,200,208],"can":[11,53,94,124,162,178],"reduce":[12,163,179],"the":[13,56,96,113,120,132,146,155,164,180,195,211],"performance":[14,97],"gap":[15,165],"between":[16],"soft":[17,66,100],"scalar":[18,67,101],"CPUs":[19],"and":[20,30,141,189,207],"resource-intensive":[21],"custom":[22,114,159],"datapath":[23,160],"designs.":[24],"This":[25],"article":[26],"demonstrates":[27],"that":[28,152],"Thread-":[29],"Instruction-Level":[31],"parallel":[32],"Template":[33],"architecture":[34],"(TILT),":[35],"a":[36,65,71,82,87,99],"programmable":[37],"FPGA-based":[38],"horizontally":[39],"microcoded":[40],"engine":[42],"designed":[43],"to":[44,64,70,137,166,182,191],"highly":[45],"utilize":[46],"floating":[47],"point":[48],"(FP)":[49],"functional":[50],"units":[51],"(FUs),":[52],"improve":[54,95],"significantly":[55],"average":[57,110,129,184],"throughput":[58],"of":[59,112,122,145,157,173,176,185,218],"eight":[60,75,219],"FP-intensive":[61],"applications":[62],"compared":[63],"CPU":[68,102],"(similar":[69],"FP-extended":[72],"Nios).":[73],"For":[74],"benchmark":[76],"applications,":[77],"show":[79],"that:":[80],"(i)":[81],"base":[83],"TILT":[84,150,177],"configuration":[85,151],"having":[86],"single":[88],"instance":[89],"for":[90,194,203,215],"each":[91,158],"FU":[92],"type":[93],"over":[98],"by":[103],"15.8":[104],"\u00d7":[105,136,139,168,187,193],",":[106,169,188],"while":[107,170],"requiring":[108],"on":[109],"26%":[111],"datapaths\u2019":[115],"area;":[116],"(ii)":[117],"selectively":[118],"increasing":[119],"number":[121],"FUs":[123],"more":[125],"than":[126],"double":[127],"TILT\u2019s":[128],"throughput,":[130],"reducing":[131],"custom-datapath-throughput-gap":[133,181],"from":[134],"576":[135],"14":[138],";":[140],"(iii)":[142],"replicated":[143,171],"instances":[144,172],"most":[147],"computationally":[148,212],"dense":[149],"fit":[153],"within":[154],"area":[156],"design":[161,204,214],"8.27":[167],"application-tuned":[174],"configurations":[175],"an":[183],"5.22":[186],"up":[190],"3.41":[192],"Matrix":[196],"Multiply":[197],"benchmark.":[198],"Last,":[199],"present":[201],"methods":[202],"space":[205],"reduction,":[206],"correctly":[209],"predict":[210],"densest":[213],"seven":[216],"out":[217],"benchmarks.":[220]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
