{"id":"https://openalex.org/W3195824329","doi":"https://doi.org/10.1109/access.2021.3101936","title":"Blackthorn: Latency Estimation Framework for CNNs on Embedded Nvidia Platforms","display_name":"Blackthorn: Latency Estimation Framework for CNNs on Embedded Nvidia Platforms","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3195824329","doi":"https://doi.org/10.1109/access.2021.3101936","mag":"3195824329"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3101936","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3101936","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09503415.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09503415.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087961950","display_name":"Martin Lechner","orcid":"https://orcid.org/0000-0003-1083-0246"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Martin Lechner","raw_affiliation_strings":["Institute of Computer Technology, TU Wien, Vienna, Austria","Christian Doppler Laboratory for Embedded Machine Learning, Institute of Computer Technology, TU Wien, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Technology, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Christian Doppler Laboratory for Embedded Machine Learning, Institute of Computer Technology, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032163732","display_name":"Axel Jantsch","orcid":"https://orcid.org/0000-0003-2251-0004"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Axel Jantsch","raw_affiliation_strings":["Institute of Computer Technology, TU Wien, Vienna, Austria","Christian Doppler Laboratory for Embedded Machine Learning, Institute of Computer Technology, TU Wien, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Technology, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Christian Doppler Laboratory for Embedded Machine Learning, Institute of Computer Technology, TU Wien, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5087961950"],"corresponding_institution_ids":["https://openalex.org/I145847075"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1832,"currency":"EUR","value_usd":1975},"fwci":0.9607,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.77495098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"9","issue":null,"first_page":"110074","last_page":"110084"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.83335280418396},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.7447199821472168},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5949918627738953},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.542521059513092},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.5218526124954224},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.47500526905059814},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.4682183265686035},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.45378822088241577},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.42161762714385986},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.38348934054374695},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3522670567035675},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.348785936832428},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3215731978416443},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09543642401695251}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83335280418396},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.7447199821472168},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5949918627738953},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.542521059513092},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.5218526124954224},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.47500526905059814},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.4682183265686035},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.45378822088241577},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.42161762714385986},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.38348934054374695},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3522670567035675},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.348785936832428},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3215731978416443},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09543642401695251},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2021.3101936","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3101936","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09503415.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:df5e7bce502d4486b9566620b01c45b2","is_oa":true,"landing_page_url":"https://doaj.org/article/df5e7bce502d4486b9566620b01c45b2","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 110074-110084 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3101936","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3101936","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09503415.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311813","display_name":"\u00d6sterreichische Nationalstiftung f\u00fcr Forschung, Technologie und Entwicklung","ror":"https://ror.org/04hb33h70"},{"id":"https://openalex.org/F4320322839","display_name":"Technische Universit\u00e4t Wien","ror":"https://ror.org/04d836q62"},{"id":"https://openalex.org/F4320323591","display_name":"Christian Doppler Forschungsgesellschaft","ror":"https://ror.org/00mv8h305"},{"id":"https://openalex.org/F4320327593","display_name":"Bundesministerium f\u00fcr Digitalisierung und Wirtschaftsstandort","ror":null},{"id":"https://openalex.org/F4320335937","display_name":"Technische Universit\u00e4t Wien Bibliothek","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3195824329.pdf","grobid_xml":"https://content.openalex.org/works/W3195824329.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1686810756","https://openalex.org/W2005660297","https://openalex.org/W2097117768","https://openalex.org/W2155893237","https://openalex.org/W2530484110","https://openalex.org/W2583832915","https://openalex.org/W2613718673","https://openalex.org/W2618530766","https://openalex.org/W2752512710","https://openalex.org/W2766975872","https://openalex.org/W2799197246","https://openalex.org/W2912238674","https://openalex.org/W2945856475","https://openalex.org/W2954826145","https://openalex.org/W2962835968","https://openalex.org/W2963163009","https://openalex.org/W2963542991","https://openalex.org/W2963633606","https://openalex.org/W2963911037","https://openalex.org/W2966048283","https://openalex.org/W2975386002","https://openalex.org/W2990781015","https://openalex.org/W2995763407","https://openalex.org/W3015497938","https://openalex.org/W3033428961","https://openalex.org/W3037110041","https://openalex.org/W3043571714","https://openalex.org/W3098316065","https://openalex.org/W3100741579","https://openalex.org/W3105131457","https://openalex.org/W3115175310","https://openalex.org/W6629368666","https://openalex.org/W6637373629","https://openalex.org/W6638444622","https://openalex.org/W6744307745","https://openalex.org/W6745690570","https://openalex.org/W6769454693","https://openalex.org/W6773944657"],"related_works":["https://openalex.org/W2102148524","https://openalex.org/W2314720829","https://openalex.org/W4385074335","https://openalex.org/W2626189183","https://openalex.org/W2307385607","https://openalex.org/W2626268514","https://openalex.org/W324331621","https://openalex.org/W4226285110","https://openalex.org/W2973113505","https://openalex.org/W2049261842"],"abstract_inverted_index":{"With":[0],"more":[1,43],"powerful":[2],"yet":[3],"efficient":[4],"embedded":[5,83],"devices":[6,31,141],"and":[7,61,68,102,118,138,151],"accelerators":[8],"being":[9],"available":[10],"for":[11,37,48,82,94,170],"Deep":[12],"Neural":[13],"Networks":[14],"(DNN),":[15],"machine":[16],"learning":[17],"is":[18,51,167],"becoming":[19],"an":[20],"integral":[21],"part":[22],"of":[23,29,58,106,122,147],"edge":[24],"computing.":[25],"As":[26],"the":[27,34,54,104,163,171],"number":[28],"such":[30],"increases,":[32],"finding":[33],"best":[35],"platform":[36],"a":[38,59,62,77,107,143],"specific":[39],"application":[40,49],"has":[41],"become":[42],"challenging.":[44],"A":[45],"common":[46],"question":[47],"developers":[50,98],"to":[52,99,109,127],"find":[53,100],"most":[55],"cost-effective":[56],"combination":[57],"DNN":[60,108],"device":[63],"while":[64],"still":[65],"meeting":[66],"latency":[67,79,165],"accuracy":[69],"requirements.":[70],"In":[71],"this":[72],"work,":[73],"we":[74],"propose":[75],"Blackthorn,":[76],"layer-wise":[78],"estimation":[80,145],"framework":[81,114],"Nvidia":[84],"GPUs":[85],"based":[86],"on":[87,135],"analytical":[88],"models.":[89],"We":[90],"provide":[91],"accurate":[92],"predictions":[93],"each":[95],"layer,":[96],"helping":[97],"bottlenecks":[101],"optimize":[103],"architecture":[105],"fit":[110],"target":[111],"platforms.":[112],"Our":[113,132],"can":[115],"quickly":[116],"evaluate":[117],"compare":[119],"large":[120],"amounts":[121],"network":[123,161],"optimizations":[124],"without":[125],"needing":[126],"build":[128],"time-consuming":[129],"execution":[130],"engines.":[131],"experimental":[133],"results":[134],"Jetson":[136,139],"TX2":[137],"Nano":[140],"show":[142],"per-layer":[144],"error":[146,166],"6.104%":[148],"Root-Mean-Square-Percentage-Error":[149],"(RMSPE)":[150],"5.888%":[152],"RMSPE,":[153],"which":[154],"significantly":[155],"outperforms":[156],"current":[157],"state-of-the-art":[158],"methods.":[159],"At":[160],"level,":[162],"average":[164],"below":[168],"3%":[169],"tested":[172],"DNNs.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
