{"id":"https://openalex.org/W4362505502","doi":"https://doi.org/10.1145/3578245.3583716","title":"Core-Level Performance Engineering with the Open-Source Architecture Code Analyzer (OSACA) and the Compiler Explorer","display_name":"Core-Level Performance Engineering with the Open-Source Architecture Code Analyzer (OSACA) and the Compiler Explorer","publication_year":2023,"publication_date":"2023-04-03","ids":{"openalex":"https://openalex.org/W4362505502","doi":"https://doi.org/10.1145/3578245.3583716"},"language":"en","primary_location":{"id":"doi:10.1145/3578245.3583716","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3578245.3583716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2023 ACM/SPEC International Conference on Performance Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031307529","display_name":"Jan Laukemann","orcid":"https://orcid.org/0000-0002-3776-9353"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Jan Laukemann","raw_affiliation_strings":["Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082552227","display_name":"Georg Hager","orcid":"https://orcid.org/0000-0002-8723-2781"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Hager","raw_affiliation_strings":["Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany"],"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg, Erlangen, Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5031307529"],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":0.3091,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.45084799,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"127","last_page":"131"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8750993013381958},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.7401110529899597},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6605550050735474},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.5251823663711548},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5174815654754639},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5042034387588501},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.49935126304626465},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4957505166530609},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.424851655960083},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4211026132106781},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.41022390127182007},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4049784541130066},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.38569456338882446},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.38518470525741577},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.37052834033966064},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3140382170677185},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.207280695438385}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8750993013381958},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.7401110529899597},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6605550050735474},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.5251823663711548},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5174815654754639},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5042034387588501},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.49935126304626465},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4957505166530609},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.424851655960083},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4211026132106781},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.41022390127182007},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4049784541130066},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.38569456338882446},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.38518470525741577},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.37052834033966064},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3140382170677185},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.207280695438385},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3578245.3583716","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1145/3578245.3583716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2023 ACM/SPEC International Conference on Performance Engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1517652255","https://openalex.org/W2002555321","https://openalex.org/W2101511474","https://openalex.org/W2891241286","https://openalex.org/W2897675008","https://openalex.org/W3016197610","https://openalex.org/W3089579782","https://openalex.org/W3103041597","https://openalex.org/W3104900731","https://openalex.org/W3122403434","https://openalex.org/W4211158017","https://openalex.org/W4282970171"],"related_works":["https://openalex.org/W2129537883","https://openalex.org/W2566637483","https://openalex.org/W2127324789","https://openalex.org/W3024308452","https://openalex.org/W4244894488","https://openalex.org/W4285390450","https://openalex.org/W4225987401","https://openalex.org/W4236526691","https://openalex.org/W2003690377","https://openalex.org/W2079351402"],"abstract_inverted_index":{"While":[0],"many":[1],"developers":[2],"put":[3],"a":[4,56,70,147],"lot":[5],"of":[6,17,59,69,150],"effort":[7],"into":[8,110],"optimizing":[9],"large-scale":[10],"parallelism,":[11],"they":[12],"often":[13],"neglect":[14],"the":[15,33,51,60,67,75,138,151],"importance":[16],"an":[18],"efficient":[19],"serial":[20,25],"code.":[21],"Even":[22],"worse,":[23],"slow":[24],"code":[26],"tends":[27],"to":[28,54,160,178],"scale":[29],"very":[30],"well,":[31],"hiding":[32],"fact":[34],"that":[35],"resources":[36],"are":[37],"wasted":[38],"because":[39],"no":[40],"definite":[41],"hardware":[42,65],"performance":[43,92,132,135,170,183],"limit":[44],"(\"bottleneck\")":[45],"is":[46],"exhausted.":[47],"This":[48],"tutorial":[49],"conveys":[50],"required":[52],"knowledge":[53],"develop":[55],"thorough":[57],"understanding":[58],"interactions":[61],"between":[62],"software":[63],"and":[64,74,89,100,114,126,134,165,167],"on":[66],"level":[68,79],"single":[71],"CPU":[72],"core":[73,87],"lowest":[76],"memory":[77],"hierarchy":[78],"(the":[80],"L1":[81],"cache).":[82],"We":[83,107,129],"introduce":[84],"general":[85],"out-of-order":[86],"architectures":[88],"their":[90,162],"typical":[91],"bottlenecks":[93],"using":[94,137],"modern":[95],"x86-64":[96],"(Intel":[97],"Ice":[98],"Lake)":[99],"ARM":[101],"(Fujitsu":[102],"A64FX)":[103],"processors":[104],"as":[105],"examples.":[106],"then":[108],"go":[109],"detail":[111],"about":[112],"x86":[113],"AArch64":[115],"assembly":[116],"code,":[117],"specifically":[118],"including":[119],"vectorization":[120],"(SIMD),":[121],"pipeline":[122],"utilization,":[123],"critical":[124],"paths,":[125],"loop-carried":[127],"dependencies.":[128],"also":[130],"demonstrate":[131],"analysis":[133],"engineering":[136,184],"Open-Source":[139],"Architecture":[140],"Code":[141],"Analyzer":[142],"(OSACA)":[143],"in":[144],"combination":[145],"with":[146],"dedicated":[148],"instance":[149],"well-known":[152],"Compiler":[153],"Explorer.":[154],"Various":[155],"hands-on":[156],"exercises":[157],"allow":[158],"attendees":[159],"make":[161],"own":[163],"experiments":[164],"measurements":[166],"identify":[168],"in-core":[169,182],"bottlenecks.":[171],"Furthermore,":[172],"we":[173],"show":[174],"real-life":[175],"use":[176],"cases":[177],"emphasize":[179],"how":[180],"profitable":[181],"can":[185],"be.":[186]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
