{"id":"https://openalex.org/W3008730548","doi":"https://doi.org/10.1145/3373087.3375321","title":"End-to-End Optimization of Deep Learning Applications","display_name":"End-to-End Optimization of Deep Learning Applications","publication_year":2020,"publication_date":"2020-02-23","ids":{"openalex":"https://openalex.org/W3008730548","doi":"https://doi.org/10.1145/3373087.3375321","mag":"3008730548"},"language":"en","primary_location":{"id":"doi:10.1145/3373087.3375321","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3373087.3375321","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3373087.3375321","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3373087.3375321","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039319411","display_name":"Atefeh Sohrabizadeh","orcid":"https://orcid.org/0000-0002-1156-3306"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Atefeh Sohrabizadeh","raw_affiliation_strings":["University of California, Los Angeles, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100440055","display_name":"Jie Wang","orcid":"https://orcid.org/0009-0005-4657-7977"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Wang","raw_affiliation_strings":["University of California, Los Angeles, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016776689","display_name":"Jason Cong","orcid":"https://orcid.org/0000-0003-2887-6963"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Cong","raw_affiliation_strings":["University of California, Los Angeles, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I161318765"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5039319411"],"corresponding_institution_ids":["https://openalex.org/I161318765"],"apc_list":null,"apc_paid":null,"fwci":3.1396,"has_fulltext":true,"cited_by_count":39,"citation_normalized_percentile":{"value":0.9315303,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"133","last_page":"139"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8362058401107788},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7328854203224182},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7212905883789062},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6742285490036011},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6507002711296082},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5922551155090332},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5157716274261475},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.480214923620224},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4606471359729767},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.44999420642852783},{"id":"https://openalex.org/keywords/handshake","display_name":"Handshake","score":0.42542946338653564},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.42175018787384033},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4163801372051239},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.41277217864990234},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4080731272697449},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4001293182373047},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3123464584350586},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.23418116569519043},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2237018346786499}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8362058401107788},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7328854203224182},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7212905883789062},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6742285490036011},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6507002711296082},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5922551155090332},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5157716274261475},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.480214923620224},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4606471359729767},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.44999420642852783},{"id":"https://openalex.org/C2778000800","wikidata":"https://www.wikidata.org/wiki/Q830043","display_name":"Handshake","level":3,"score":0.42542946338653564},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.42175018787384033},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4163801372051239},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.41277217864990234},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4080731272697449},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4001293182373047},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3123464584350586},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.23418116569519043},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2237018346786499},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3373087.3375321","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3373087.3375321","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3373087.3375321","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3373087.3375321","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3373087.3375321","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3373087.3375321","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G285687498","display_name":null,"funder_award_id":"CNS-1719403","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4587427570","display_name":null,"funder_award_id":"EP/S030069/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5212922409","display_name":null,"funder_award_id":"1719403","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5359326208","display_name":null,"funder_award_id":"DBI-1707408, CNS 1719403","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5432741810","display_name":null,"funder_award_id":"1707408","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7143216947","display_name":null,"funder_award_id":"DBI-1707408","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3008730548.pdf","grobid_xml":"https://content.openalex.org/works/W3008730548.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1549358575","https://openalex.org/W1686810756","https://openalex.org/W2094756095","https://openalex.org/W2294282016","https://openalex.org/W2402144811","https://openalex.org/W2516097068","https://openalex.org/W2525740295","https://openalex.org/W2540189295","https://openalex.org/W2559085405","https://openalex.org/W2612445135","https://openalex.org/W2625954420","https://openalex.org/W2727238169","https://openalex.org/W2884193496","https://openalex.org/W2886080480","https://openalex.org/W2887936511","https://openalex.org/W2895432151","https://openalex.org/W2899644485","https://openalex.org/W2899691047","https://openalex.org/W2899915146","https://openalex.org/W2900082550","https://openalex.org/W2950395921","https://openalex.org/W2963163009","https://openalex.org/W2997828269","https://openalex.org/W4301091646","https://openalex.org/W4387007199"],"related_works":["https://openalex.org/W2358991869","https://openalex.org/W4285173741","https://openalex.org/W2309292492","https://openalex.org/W2735105689","https://openalex.org/W1482833264","https://openalex.org/W1486050759","https://openalex.org/W2106545930","https://openalex.org/W1981032420","https://openalex.org/W3207859108","https://openalex.org/W2058965144"],"abstract_inverted_index":{"The":[0,185],"irregularity":[1],"of":[2,94,116,147,202,210],"recent":[3],"Convolutional":[4],"Neural":[5],"Network":[6],"(CNN)":[7],"models":[8],"such":[9],"as":[10,166],"less":[11],"data":[12,125],"reuse":[13],"and":[14,22,102,124,150,204],"parallelism":[15],"due":[16],"to":[17,71,73,89,190],"the":[18,79,91,133,144,175,196,219,229],"extensive":[19],"network":[20],"pruning":[21],"simplification":[23],"creates":[24],"new":[25],"challenges":[26,80],"for":[27,113,162,212,223],"FPGA":[28,65,227],"acceleration.":[29],"Furthermore,":[30],"without":[31],"proper":[32],"optimization,":[33],"there":[34],"could":[35,69],"be":[36],"significant":[37],"overheads":[38,146],"when":[39],"integrating":[40],"FPGAs":[41],"into":[42,67,132],"existing":[43],"machine":[44],"learning":[45,96],"frameworks":[46],"like":[47],"TensorFlow.":[48],"Such":[49],"a":[50,63,100,137,158,167,191,200],"problem":[51],"is":[52,129,218],"mostly":[53],"overlooked":[54],"by":[55],"previous":[56],"studies.":[57],"However,":[58],"our":[59],"study":[60],"shows":[61],"that":[62,173],"naive":[64],"integration":[66,140,187],"TensorFlow":[68,134],"lead":[70],"up":[72],"8.45x":[74],"performance":[75,183,209,221],"degradation.":[76],"To":[77],"address":[78],"mentioned":[81],"above,":[82],"we":[83,179],"propose":[84],"several":[85],"SW/HW":[86,197],"co-design":[87],"approaches":[88],"perform":[90],"end-to-end":[92,208],"optimization":[93],"deep":[95],"applications.":[97],"We":[98,155],"present":[99],"flexible":[101],"composable":[103],"architecture":[104,177],"called":[105],"FlexCNN.":[106],"It":[107],"can":[108,180],"deliver":[109],"high":[110,145],"computation":[111],"efficiency":[112],"different":[114],"types":[115],"convolution":[117],"layers":[118],"using":[119],"techniques":[120],"including":[121],"dynamic":[122],"tiling":[123],"layout":[126],"optimization.":[127],"FlexCNN":[128,176],"further":[130,192],"integrated":[131],"framework":[135],"with":[136,174,214],"fully-pipelined":[138],"software-hardware":[139],"flow.":[141],"This":[142],"alleviates":[143],"TensorFlow-FPGA":[148],"handshake":[149],"other":[151],"non-CNN":[152],"processing":[153],"stages.":[154],"use":[156],"OpenPose,":[157],"popular":[159],"CNN-based":[160],"application":[161,225],"human":[163],"pose":[164],"recognition,":[165],"case":[168],"study.":[169],"Experimental":[170],"results":[171,205],"show":[172],"optimizations,":[178],"achieve":[181],"2.3x":[182],"improvement.":[184],"pipelined":[186],"stack":[188],"leads":[189],"5x":[193],"speedup.":[194],"Overall,":[195],"co-optimization":[198],"produces":[199],"speedup":[201],"11.5x":[203],"in":[206,228],"an":[207],"23.8FPS":[211],"OpenPose":[213],"floating-point":[215],"precision,":[216],"which":[217],"highest":[220],"reported":[222],"this":[224],"on":[226],"literature.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":11}],"updated_date":"2026-03-28T08:17:26.163206","created_date":"2025-10-10T00:00:00"}
