{"id":"https://openalex.org/W2903298089","doi":"https://doi.org/10.1109/hpec.2018.8547521","title":"Application Aware Tuning of Reconfigurable Multi-Layer Perceptron Architectures","display_name":"Application Aware Tuning of Reconfigurable Multi-Layer Perceptron Architectures","publication_year":2018,"publication_date":"2018-09-01","ids":{"openalex":"https://openalex.org/W2903298089","doi":"https://doi.org/10.1109/hpec.2018.8547521","mag":"2903298089"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2018.8547521","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2018.8547521","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE High Performance extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034744458","display_name":"Ahmed Sanaullah","orcid":"https://orcid.org/0000-0002-7169-480X"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ahmed Sanaullah","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Boston University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Boston University, Boston, MA","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103906926","display_name":"Chen Yang","orcid":"https://orcid.org/0000-0001-5228-3426"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Yang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Boston University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Boston University, Boston, MA","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007973964","display_name":"Yuri Alexeev","orcid":"https://orcid.org/0000-0001-5066-2254"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuri Alexeev","raw_affiliation_strings":["Argonne National Laboratory, IL"],"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, IL","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032371884","display_name":"Kazutomo Yoshii","orcid":"https://orcid.org/0000-0003-1904-5383"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kazutomo Yoshii","raw_affiliation_strings":["Argonne National Laboratory, IL"],"affiliations":[{"raw_affiliation_string":"Argonne National Laboratory, IL","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021051610","display_name":"Martin Herbordt","orcid":"https://orcid.org/0000-0002-3443-9113"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin C. Herbordt","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Boston University, Boston, MA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Boston University, Boston, MA","institution_ids":["https://openalex.org/I111088046"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5034744458"],"corresponding_institution_ids":["https://openalex.org/I111088046"],"apc_list":null,"apc_paid":null,"fwci":0.3863,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.64415445,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"in press","issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8539108037948608},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6554514169692993},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5619058609008789},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5510160326957703},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.508796751499176},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.48862963914871216},{"id":"https://openalex.org/keywords/critical-path-method","display_name":"Critical path method","score":0.4732435643672943},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4392157196998596},{"id":"https://openalex.org/keywords/multilayer-perceptron","display_name":"Multilayer perceptron","score":0.43367907404899597},{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.4334333837032318},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3588617444038391},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3401796817779541},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.32917582988739014},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1739814281463623}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8539108037948608},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6554514169692993},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5619058609008789},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5510160326957703},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.508796751499176},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.48862963914871216},{"id":"https://openalex.org/C115874739","wikidata":"https://www.wikidata.org/wiki/Q825377","display_name":"Critical path method","level":2,"score":0.4732435643672943},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4392157196998596},{"id":"https://openalex.org/C179717631","wikidata":"https://www.wikidata.org/wiki/Q2991667","display_name":"Multilayer perceptron","level":3,"score":0.43367907404899597},{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.4334333837032318},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3588617444038391},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3401796817779541},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.32917582988739014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1739814281463623},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec.2018.8547521","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2018.8547521","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE High Performance extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W293832754","https://openalex.org/W1628266928","https://openalex.org/W1999563089","https://openalex.org/W2097293837","https://openalex.org/W2110933408","https://openalex.org/W2146858735","https://openalex.org/W2271840356","https://openalex.org/W2562773490","https://openalex.org/W2606722458","https://openalex.org/W2610206238","https://openalex.org/W2890068895","https://openalex.org/W2890636291","https://openalex.org/W2894590528","https://openalex.org/W2902469261","https://openalex.org/W2906549808","https://openalex.org/W4242577057","https://openalex.org/W6736910227","https://openalex.org/W6756364914","https://openalex.org/W6757565226"],"related_works":["https://openalex.org/W2109699519","https://openalex.org/W2006568360","https://openalex.org/W102726818","https://openalex.org/W4233616027","https://openalex.org/W2059591361","https://openalex.org/W970262775","https://openalex.org/W2058965144","https://openalex.org/W4244724753","https://openalex.org/W1580755070","https://openalex.org/W1968235461"],"abstract_inverted_index":{"Production":[0],"FPGA":[1,202],"implementations":[2],"of":[3,38,44,49,63,72,84,208],"Multi-Layer":[4],"Perceptron":[5],"(MLP)":[6],"inference":[7,132,197],"typically":[8],"address":[9,17,30],"the":[10,34,60,89,98,125,142,189,200,211],"growing":[11],"performance":[12],"demands":[13],"by,":[14],"(i)":[15],"to":[16,29,59,102,123,153,169],"memory":[18],"boundedness,":[19,32],"storing":[20],"neuron":[21],"weights":[22],"on-chip,":[23],"e.g.,":[24],"Microsoft":[25],"Brainwave,":[26],"and,":[27],"(ii)":[28],"compute":[31],"generating":[33],"largest":[35],"possible":[36],"arrays":[37],"multipliers":[39],"and":[40,82,145,157,192,203],"accumulators.":[41],"These":[42],"approaches":[43],"maximizing":[45],"device":[46],"utilization,":[47],"irrespective":[48],"application":[50],"model,":[51],"can":[52,76,95,181],"actually":[53],"result":[54,96],"in":[55,80,88,97,141],"higher":[56],"latencies":[57],"due":[58],"tight":[61],"coupling":[62],"different":[64],"function":[65],"modules.":[66],"For":[67,188],"example,":[68],"generic":[69],"parameter":[70,179],"sizing":[71,180],"a":[73,104,113],"given":[74],"component":[75],"force":[77],"an":[78,205],"increase":[79],"complexity":[81],"latency":[83,183],"multiple":[85],"other":[86],"modules":[87,140],"design.":[90],"In":[91,106,161],"real-time":[92],"applications,":[93],"this":[94,151],"overall":[99],"computation":[100],"failing":[101],"make":[103],"deadline.":[105],"our":[107],"work":[108],"we":[109,120,163,195],"begin":[110],"by":[111,138,184],"creating":[112],"testbed":[114],"for":[115,129],"low-latency":[116],"MLP":[117,131],"inference,":[118],"which":[119],"then":[121,149],"use":[122,150],"explore":[124],"application-aware":[126],"optimization":[127,135],"space":[128],"compute-bound":[130],"engines.":[133],"The":[134],"process":[136],"begins":[137],"identifying":[139],"critical":[143],"path":[144],"their":[146,158],"connectivity.":[147],"We":[148,175],"information":[152],"determine":[154],"key":[155],"parameters":[156],"ideal":[159],"values.":[160],"addition,":[162],"automate":[164],"hardware":[165],"generation":[166],"using":[167,199],"OpenCL":[168],"ensure":[170],"standard":[171],"optimizations":[172],"are":[173],"applied.":[174],"find":[176],"that":[177],"correct":[178],"reduce":[182],"20%":[185],"on":[186],"average.":[187],"MNIST,":[190],"Poker,":[191],"ECP-Candle":[193],"benchmarks,":[194],"implement":[196],"models":[198],"Arria10X115":[201],"achieve":[204],"average":[206],"speedup":[207],"1.47x":[209],"over":[210],"NVIDIA":[212],"Tesla":[213],"P100":[214],"GPU.":[215]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
