{"id":"https://openalex.org/W4293100236","doi":"https://doi.org/10.1177/10943420221077107","title":"Performance portability in a real world application: PHAST applied to Caffe","display_name":"Performance portability in a real world application: PHAST applied to Caffe","publication_year":2022,"publication_date":"2022-03-21","ids":{"openalex":"https://openalex.org/W4293100236","doi":"https://doi.org/10.1177/10943420221077107"},"language":"en","primary_location":{"id":"doi:10.1177/10943420221077107","is_oa":false,"landing_page_url":"https://doi.org/10.1177/10943420221077107","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103175601","display_name":"Pablo A. Lanzarote Mart\u00ednez","orcid":"https://orcid.org/0000-0002-4391-2451"},"institutions":[{"id":"https://openalex.org/I80180929","display_name":"Universidad de Murcia","ror":"https://ror.org/03p3aeb86","country_code":"ES","type":"education","lineage":["https://openalex.org/I80180929"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Pablo Antonio Mart\u00ednez","raw_affiliation_strings":["Computer Engineering Department, University of Murcia, Murcia, Spain"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, University of Murcia, Murcia, Spain","institution_ids":["https://openalex.org/I80180929"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021283515","display_name":"Biagio Peccerillo","orcid":"https://orcid.org/0000-0002-4998-0092"},"institutions":[{"id":"https://openalex.org/I102064193","display_name":"University of Siena","ror":"https://ror.org/01tevnk56","country_code":"IT","type":"education","lineage":["https://openalex.org/I102064193"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Biagio Peccerillo","raw_affiliation_strings":["Department of Information Engineering and Mathematics, University of Siena, Siena, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Information Engineering and Mathematics, University of Siena, Siena, Italy","institution_ids":["https://openalex.org/I102064193"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047011963","display_name":"Sandro Bartolini","orcid":"https://orcid.org/0000-0002-7975-3632"},"institutions":[{"id":"https://openalex.org/I102064193","display_name":"University of Siena","ror":"https://ror.org/01tevnk56","country_code":"IT","type":"education","lineage":["https://openalex.org/I102064193"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Sandro Bartolini","raw_affiliation_strings":["Department of Information Engineering and Mathematics, University of Siena, Siena, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Information Engineering and Mathematics, University of Siena, Siena, Italy","institution_ids":["https://openalex.org/I102064193"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070286319","display_name":"Jos\u00e9 M. Garc\u0131\u0301a","orcid":"https://orcid.org/0000-0002-6388-2835"},"institutions":[{"id":"https://openalex.org/I80180929","display_name":"Universidad de Murcia","ror":"https://ror.org/03p3aeb86","country_code":"ES","type":"education","lineage":["https://openalex.org/I80180929"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 M Garc\u00eda","raw_affiliation_strings":["Computer Engineering Department, University of Murcia, Murcia, Spain"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, University of Murcia, Murcia, Spain","institution_ids":["https://openalex.org/I80180929"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057772505","display_name":"Gregorio Bernab\u00e9","orcid":"https://orcid.org/0000-0002-7265-3508"},"institutions":[{"id":"https://openalex.org/I80180929","display_name":"Universidad de Murcia","ror":"https://ror.org/03p3aeb86","country_code":"ES","type":"education","lineage":["https://openalex.org/I80180929"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Gregorio Bernab\u00e9","raw_affiliation_strings":["Computer Engineering Department, University of Murcia, Murcia, Spain"],"affiliations":[{"raw_affiliation_string":"Computer Engineering Department, University of Murcia, Murcia, Spain","institution_ids":["https://openalex.org/I80180929"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103175601"],"corresponding_institution_ids":["https://openalex.org/I80180929"],"apc_list":null,"apc_paid":null,"fwci":0.4541,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.59073143,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"36","issue":"3","first_page":"419","last_page":"439"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.8643105626106262},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7648801207542419},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5723215341567993},{"id":"https://openalex.org/keywords/mnist-database","display_name":"MNIST database","score":0.5291674137115479},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5189773440361023},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.471921443939209},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.43997058272361755},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.43250107765197754},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.418544203042984},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4069400131702423},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.32585608959198},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13308003544807434},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10395053029060364}],"concepts":[{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.8643105626106262},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7648801207542419},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5723215341567993},{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.5291674137115479},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5189773440361023},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.471921443939209},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.43997058272361755},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.43250107765197754},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.418544203042984},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4069400131702423},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.32585608959198},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13308003544807434},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10395053029060364},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1177/10943420221077107","is_oa":false,"landing_page_url":"https://doi.org/10.1177/10943420221077107","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},{"id":"pmh:oai:usiena-air.unisi.it:11365/1252534","is_oa":false,"landing_page_url":"https://journals.sagepub.com/doi/10.1177/10943420221077107","pdf_url":null,"source":{"id":"https://openalex.org/S4377196319","display_name":"Use Siena air (University of Siena)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I102064193","host_organization_name":"University of Siena","host_organization_lineage":["https://openalex.org/I102064193"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G6023636391","display_name":null,"funder_award_id":"RTI2018-098156-B-C53","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1940012472","https://openalex.org/W1990090459","https://openalex.org/W2149381887","https://openalex.org/W2155893237","https://openalex.org/W2591122215","https://openalex.org/W2604319603","https://openalex.org/W2606722458","https://openalex.org/W2743401659","https://openalex.org/W2878447367","https://openalex.org/W2912012512","https://openalex.org/W2913954081","https://openalex.org/W2915755582","https://openalex.org/W2951894856","https://openalex.org/W2983655274","https://openalex.org/W2989312222","https://openalex.org/W3013081263","https://openalex.org/W3034516819","https://openalex.org/W3082020764","https://openalex.org/W3117961507","https://openalex.org/W3125923813","https://openalex.org/W3184015517","https://openalex.org/W4210389905","https://openalex.org/W4252274690","https://openalex.org/W6931359494","https://openalex.org/W6931424762"],"related_works":["https://openalex.org/W2950475743","https://openalex.org/W4386603768","https://openalex.org/W3102660566","https://openalex.org/W3020739840","https://openalex.org/W2983282793","https://openalex.org/W2913998709","https://openalex.org/W4386875822","https://openalex.org/W3177128669","https://openalex.org/W4388483283","https://openalex.org/W4385574943"],"abstract_inverted_index":{"This":[0],"work":[1],"covers":[2],"the":[3,16,30,45,91,102,115,122,133,155,161,168,179,182,209,230],"PHAST":[4,123,134,142,156,183,199],"Library\u2019s":[5],"employment,":[6],"a":[7,12,54,74,152,201,206,223],"hardware-agnostic":[8],"programming":[9],"library,":[10],"to":[11,34,42,52,114,121],"real-world":[13],"application":[14],"like":[15],"Caffe":[17,23,76,116,159,172],"framework.":[18],"The":[19],"original":[20,169],"implementation":[21,57,77,143,184],"of":[22,25,29,59,105,136,158,171,189,203],"consists":[24],"two":[26,110,175],"different":[27,111,176],"versions":[28],"source":[31,117],"code:":[32],"one":[33,41],"run":[35,43],"on":[36,44,62,147],"CPU":[37,63,195,216],"platforms":[38],"and":[39,64,119,149,196,205,217],"another":[40],"GPU":[46],"side.":[47],"With":[48,151],"PHAST,":[49],"we":[50,69,83,108],"aim":[51],"develop":[53],"single-source":[55],"code":[56,118,193,214],"capable":[58],"running":[60],"efficiently":[61],"GPU.":[65,197],"In":[66,100],"this":[67,106],"paper,":[68],"start":[70],"by":[71,96],"carrying":[72],"out":[73],"basic":[75],"performance":[78,86,93,131,145,166],"analysis":[79],"using":[80],"PHAST.":[81],"Then,":[82],"detail":[84],"possible":[85],"upgrades.":[87],"We":[88,138],"find":[89,109],"that":[90,140],"overall":[92],"is":[94],"dominated":[95],"few":[97],"\u2018heavy\u2019":[98],"layers.":[99],"refining":[101],"inefficient":[103],"parts":[104],"version,":[107],"approaches:":[112],"improvements":[113,120],"Library":[124],"itself,":[125],"which":[126],"ultimately":[127],"translates":[128],"into":[129],"improved":[130],"in":[132,194,215,229],"version":[135,157,170],"Caffe.":[137],"demonstrate":[139],"our":[141],"achieves":[144,200],"portability":[146],"CPUs":[148],"GPUs.":[150],"single":[153],"source,":[154],"provides":[160],"same":[162],"or":[163],"even":[164],"better":[165],"than":[167],"built":[173],"from":[174],"codebases.":[177],"For":[178],"MNIST":[180],"database,":[181],"takes":[185],"an":[186],"equivalent":[187],"amount":[188],"time":[190],"as":[191],"native":[192,213],"Furthermore,":[198],"speedup":[202],"51%":[204],"49%":[207],"with":[208],"CIFAR-10":[210],"database":[211],"against":[212],"GPU,":[218],"respectively.":[219],"These":[220],"results":[221],"provide":[222],"new":[224],"horizon":[225],"for":[226],"software":[227],"development":[228],"upcoming":[231],"heterogeneous":[232],"computing":[233],"era.":[234]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-08-26T00:00:00"}
