{"id":"https://openalex.org/W3157947391","doi":"https://doi.org/10.1145/3456669.3456678","title":"Profiling Heterogeneous Computing Performance with VTune Profiler","display_name":"Profiling Heterogeneous Computing Performance with VTune Profiler","publication_year":2021,"publication_date":"2021-04-27","ids":{"openalex":"https://openalex.org/W3157947391","doi":"https://doi.org/10.1145/3456669.3456678","mag":"3157947391"},"language":"en","primary_location":{"id":"doi:10.1145/3456669.3456678","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3456669.3456678","pdf_url":null,"source":{"id":"https://openalex.org/S4306420323","display_name":"International Workshop on OpenCL","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Workshop on OpenCL","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072792931","display_name":"Vladimir Tsymbal","orcid":"https://orcid.org/0000-0003-0265-2232"},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Vladimir Tsymbal","raw_affiliation_strings":["Intel, DE"],"affiliations":[{"raw_affiliation_string":"Intel, DE","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072485431","display_name":"Alexandr Kurylev","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexandr Kurylev","raw_affiliation_strings":["Intel, RU"],"affiliations":[{"raw_affiliation_string":"Intel, RU","institution_ids":["https://openalex.org/I1343180700"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5072792931"],"corresponding_institution_ids":["https://openalex.org/I1343180700"],"apc_list":null,"apc_paid":null,"fwci":0.308,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.31948246,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8962472081184387},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.6715080142021179},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5439415574073792},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.4977107346057892},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.46224239468574524},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4418802261352539},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.43193304538726807},{"id":"https://openalex.org/keywords/computer-performance","display_name":"Computer performance","score":0.4198223948478699},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.40891119837760925},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3673356771469116},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3584635853767395}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8962472081184387},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.6715080142021179},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5439415574073792},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.4977107346057892},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.46224239468574524},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4418802261352539},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.43193304538726807},{"id":"https://openalex.org/C187123476","wikidata":"https://www.wikidata.org/wiki/Q1197550","display_name":"Computer performance","level":2,"score":0.4198223948478699},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.40891119837760925},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3673356771469116},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3584635853767395},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3456669.3456678","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3456669.3456678","pdf_url":null,"source":{"id":"https://openalex.org/S4306420323","display_name":"International Workshop on OpenCL","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Workshop on OpenCL","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2533043572","https://openalex.org/W2783439599","https://openalex.org/W3124194311","https://openalex.org/W2943815518","https://openalex.org/W3192695480","https://openalex.org/W1966309703","https://openalex.org/W2883934877","https://openalex.org/W1566778071"],"abstract_inverted_index":{"Programming":[0],"of":[1,7,70,112,174,204],"heterogeneous":[2,175],"platforms":[3],"requires":[4],"deep":[5],"understanding":[6],"system":[8],"architecture":[9],"on":[10,192,207,284,292],"all":[11],"levels,":[12],"which":[13,68],"help":[14],"applications":[15,38,176,205],"design":[16,151],"to":[17,66,78,83,93,130,147,224,237],"leveraging":[18,74],"the":[19,37,62,71,94,97,149,181,277],"best":[20,150],"data":[21,117,245],"and":[22,27,61,125,144,152,171,200,216,234,243,257,272,290],"work":[23],"decomposition":[24],"between":[25],"CPU":[26,45,227],"an":[28],"accelerating":[29,95],"hardware":[30],"like":[31,48,59],"GPUs.":[32,293],"However,":[33],"in":[34,96,167],"many":[35],"cases":[36],"are":[39,109,279],"being":[40,76],"converted":[41],"form":[42],"a":[43,172,186,281],"conventional":[44],"programming":[46],"language":[47],"C++,":[49,215],"or":[50],"from":[51,75],"accelerator":[52],"friendly":[53],"but":[54],"still":[55],"low":[56],"level":[57],"languages":[58],"OpenCL,":[60,212],"main":[63],"problem":[64,81],"is":[65,73,82,185,228],"determine":[67],"part":[69],"application":[72],"offloaded":[77,113,289],"GPU.":[79],"Another":[80],"estimate,":[84],"how":[85,226,285],"much":[86],"performance":[87,111,164,177,190,244],"increase":[88],"one":[89],"might":[90],"gain":[91],"due":[92],"particular":[98],"GP":[99],"GPU":[100,163,197,201,220,248,251,266],"device.":[101],"Each":[102],"platform":[103],"has":[104],"its":[105],"unique":[106],"limitations":[107],"that":[108],"affecting":[110],"computing":[114],"tasks,":[115],"e.g.":[116],"transfer":[118],"tax,":[119],"task":[120],"initialization":[121],"overhead,":[122],"memory":[123],"latency":[124],"bandwidth":[126],"limitations.":[127],"In":[128,155],"order":[129],"take":[131],"into":[132],"account":[133],"those":[134],"constraints,":[135],"software":[136],"developers":[137],"need":[138],"tooling":[139],"for":[140,189],"collecting":[141],"right":[142],"information":[143],"producing":[145],"recommendations":[146],"make":[148],"optimization":[153],"decisions.":[154],"this":[156],"presentation":[157],"we":[158],"will":[159],"introduce":[160],"two":[161],"new":[162],"analysis":[165,222],"types":[166],"Intel\u00ae":[168],"VTune\u2122":[169],"Profiler,":[170],"methodology":[173],"profiling":[178],"supported":[179],"by":[180,232],"analyses.":[182],"VTune":[183],"Profiler":[184],"well-known":[187],"tool":[188],"characterization":[191],"CPUs,":[193],"now":[194],"it":[195],"includes":[196],"Offload":[198,221],"Analysis":[199,203],"Hotspots":[202],"written":[206],"most":[208],"offloading":[209],"models":[210],"with":[211,230],"SYCL/Data":[213],"Parallel":[214],"OpenMP":[217],"Offload.":[218],"The":[219],"helps":[223],"identify":[225],"interacting":[229],"GPU(s)":[231],"creating":[233],"submitting":[235],"tasks":[236,287],"offload":[238],"queues.":[239],"It":[240],"provides":[241],"metrics":[242,278],"such":[246],"as":[247],"Utilization,":[249],"Hottest":[250],"Computing":[252],"Tasks,":[253],"Tasks":[254],"instance":[255],"count":[256],"timing,":[258],"kernel":[259],"Data":[260],"Transfer":[261],"Size,":[262],"SIMD":[263],"Width":[264],"measurements,":[265],"Execution":[267],"Units":[268],"(EU)":[269],"threads":[270],"occupancy,":[271],"Memory":[273],"Utilization.":[274],"All":[275],"together":[276],"providing":[280],"systematic":[282],"picture":[283],"effectively":[286],"were":[288],"executed":[291]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
