{"id":"https://openalex.org/W4390188190","doi":"https://doi.org/10.1109/hpec58863.2023.10363464","title":"High-Level Frameworks: Effect on Transformer Inference Time and Power on Embedded GPU Devices","display_name":"High-Level Frameworks: Effect on Transformer Inference Time and Power on Embedded GPU Devices","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4390188190","doi":"https://doi.org/10.1109/hpec58863.2023.10363464"},"language":"en","primary_location":{"id":"doi:10.1109/hpec58863.2023.10363464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec58863.2023.10363464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102920725","display_name":"Marika E. Schubert","orcid":"https://orcid.org/0000-0002-9270-8502"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marika E. Schubert","raw_affiliation_strings":["University of Pittsburgh,Department Electrical and Computer Engineering,Pittsburgh,PA,USA","Department Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh,Department Electrical and Computer Engineering,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"Department Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042589258","display_name":"David Langerman","orcid":"https://orcid.org/0000-0001-8777-4655"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Langerman","raw_affiliation_strings":["NSF Center for Space, High-Performance, and Resilient Computing,Pittsburgh,PA,USA","NSF Center for Space, High-Performance, and Resilient Computing, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"NSF Center for Space, High-Performance, and Resilient Computing,Pittsburgh,PA,USA","institution_ids":[]},{"raw_affiliation_string":"NSF Center for Space, High-Performance, and Resilient Computing, Pittsburgh, PA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082898376","display_name":"Alan D. George","orcid":"https://orcid.org/0000-0001-9665-2879"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alan D. George","raw_affiliation_strings":["University of Pittsburgh,Department Electrical and Computer Engineering,Pittsburgh,PA,USA","Department Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh,Department Electrical and Computer Engineering,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"Department Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102920725"],"corresponding_institution_ids":["https://openalex.org/I170201317"],"apc_list":null,"apc_paid":null,"fwci":0.3076,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54491206,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8299903869628906},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6349760293960571},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.5730307698249817},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.46244436502456665},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4620780944824219},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4604523777961731},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43351513147354126},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.42115747928619385},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4130484461784363},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3832402229309082},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3457666337490082},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3439560830593109},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.31901177763938904}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8299903869628906},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6349760293960571},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.5730307698249817},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.46244436502456665},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4620780944824219},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4604523777961731},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43351513147354126},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.42115747928619385},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4130484461784363},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3832402229309082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3457666337490082},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3439560830593109},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.31901177763938904},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec58863.2023.10363464","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec58863.2023.10363464","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G1214786280","display_name":null,"funder_award_id":"CNS-1738783","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1922655562","https://openalex.org/W2773756426","https://openalex.org/W2804032941","https://openalex.org/W2933138175","https://openalex.org/W2965373594","https://openalex.org/W3036601975","https://openalex.org/W3093579165","https://openalex.org/W3141239769","https://openalex.org/W3209059054","https://openalex.org/W4205185581","https://openalex.org/W4205779689","https://openalex.org/W4212774754","https://openalex.org/W4221145109","https://openalex.org/W4226033575","https://openalex.org/W4385245566","https://openalex.org/W4385572245","https://openalex.org/W6687566353","https://openalex.org/W6739901393","https://openalex.org/W6761176036","https://openalex.org/W6769196770","https://openalex.org/W6769243733","https://openalex.org/W6795952400","https://openalex.org/W6806872647","https://openalex.org/W7027429494"],"related_works":["https://openalex.org/W3020739840","https://openalex.org/W2983282793","https://openalex.org/W2913998709","https://openalex.org/W4386875822","https://openalex.org/W3177128669","https://openalex.org/W4388483283","https://openalex.org/W4385574943","https://openalex.org/W2111416043","https://openalex.org/W4243399827","https://openalex.org/W4236300446"],"abstract_inverted_index":{"Developing":[0],"software":[1],"for":[2,224],"machine-":[3],"and":[4,7,55,70,81,130,143,156,162,172,192],"deep-learning":[5],"(ML":[6],"DL)":[8],"workloads":[9],"is":[10],"often":[11],"a":[12,35,138,210],"daunting":[13],"task":[14],"to":[15,22,39,121,127,219],"individuals":[16],"with":[17,24,44],"minimal":[18],"programming":[19],"experience":[20],"or":[21],"organizations":[23],"limited":[25],"engineering":[26,46],"capacity.":[27],"ML":[28,103],"frameworks":[29,104,160,218],"address":[30],"these":[31,101,185],"issues":[32],"by":[33,64],"providing":[34,91],"high-level":[36,76,102,217],"API":[37],"able":[38],"perform":[40],"otherwise":[41],"complex":[42],"tasks":[43],"less":[45],"time.":[47],"This":[48,75,97],"high":[49],"level":[50],"of":[51,58,124],"abstraction":[52],"can":[53,82],"reduce":[54],"hide":[56],"many":[57],"the":[59,149,153,157,178,198,203,216,225],"challenges":[60],"that":[61,100,168],"are":[62,105,174],"induced":[63],"unclean":[65],"datasets,":[66],"complicated":[67],"pre/postprocessing":[68],"pipelines,":[69],"low-level":[71],"dependencies":[72],"like":[73],"CUDA.":[74],"approach":[77],"encourages":[78],"model":[79,92],"portability":[80],"dramatically":[83],"increase":[84],"design":[85],"iteration":[86],"speed,":[87],"as":[88,90],"well":[89],"speedup":[93],"in":[94],"some":[95],"cases.":[96],"research":[98],"demonstrates":[99],"also":[106,214],"more":[107,221],"performant":[108],"out-of-the-box":[109],"on":[110,202],"embedded":[111,204],"systems":[112],"than":[113,177,197],"their":[114,122],"pure":[115],"PyTorch":[116,181,200],"reference":[117,150],"implementations":[118],"likely":[119],"due":[120],"myriad":[123],"optimizations":[125],"related":[126],"data":[128],"movement":[129],"memory":[131],"management.":[132],"In":[133,183],"this":[134],"research,":[135],"we":[136,166,213],"benchmark":[137],"state-of-the-art":[139],"transcription":[140],"model,":[141],"wav2vec2,":[142],"compare":[144],"performance":[145],"across":[146],"different":[147],"frameworks:":[148],"implementation":[151,201],"from":[152],"Fairseq":[154],"framework":[155],"two":[158],"higher-level":[159],"HuggingFace":[161,173],"Lightning":[163,170],"Flash.":[164],"Overall,":[165],"observe":[167,215],"both":[169],"Flash":[171],"substantially":[175],"faster":[176,196],"original":[179],"unoptimized":[180],"model.":[182],"general,":[184],"models":[186],"ran":[187],"between":[188],"<tex":[189,193],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[190,194],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.8\\times$</tex>":[191],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$2.0\\times$</tex>":[195],"base":[199],"NVIDIA":[205],"Jetson":[206],"platforms":[207],"targeted.":[208],"As":[209],"secondary":[211],"result,":[212],"be":[220],"power":[222],"efficient":[223],"same":[226],"computation.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
