{"id":"https://openalex.org/W2520618391","doi":"https://doi.org/10.1109/hpcsim.2016.7568449","title":"Analysis of thread workgroup broadcast for Intel GPUs","display_name":"Analysis of thread workgroup broadcast for Intel GPUs","publication_year":2016,"publication_date":"2016-07-01","ids":{"openalex":"https://openalex.org/W2520618391","doi":"https://doi.org/10.1109/hpcsim.2016.7568449","mag":"2520618391"},"language":"en","primary_location":{"id":"doi:10.1109/hpcsim.2016.7568449","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2016.7568449","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091651538","display_name":"Grigore Lupescu","orcid":null},"institutions":[{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Grigore Lupescu","raw_affiliation_strings":["Computer Science, University Politehnica of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"Computer Science, University Politehnica of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I61641377"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034254754","display_name":"Emil Slu\u015fanschi","orcid":"https://orcid.org/0000-0003-0222-1002"},"institutions":[{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Emil-Ioan Slusanschi","raw_affiliation_strings":["Computer Science, University Politehnica of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"Computer Science, University Politehnica of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I61641377"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013822778","display_name":"Nicolae \u0162\u00e3pu\u015f","orcid":"https://orcid.org/0000-0002-7878-6598"},"institutions":[{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["RO"],"is_corresponding":false,"raw_author_name":"Nicolae Tapus","raw_affiliation_strings":["Computer Science, University Politehnica of Bucharest, Bucharest, Romania"],"affiliations":[{"raw_affiliation_string":"Computer Science, University Politehnica of Bucharest, Bucharest, Romania","institution_ids":["https://openalex.org/I61641377"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5091651538"],"corresponding_institution_ids":["https://openalex.org/I61641377"],"apc_list":null,"apc_paid":null,"fwci":0.3153,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.57974422,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1019","last_page":"1024"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8933364152908325},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.829343318939209},{"id":"https://openalex.org/keywords/workgroup","display_name":"Workgroup","score":0.6728252172470093},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5900130271911621},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5673772096633911},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.5369045734405518},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.5205067992210388},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.45151495933532715},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.4400525689125061},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.43516650795936584},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.4325879216194153},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.42497751116752625},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.36404597759246826}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8933364152908325},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.829343318939209},{"id":"https://openalex.org/C95423123","wikidata":"https://www.wikidata.org/wiki/Q622178","display_name":"Workgroup","level":2,"score":0.6728252172470093},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5900130271911621},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5673772096633911},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.5369045734405518},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.5205067992210388},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.45151495933532715},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.4400525689125061},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.43516650795936584},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.4325879216194153},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.42497751116752625},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.36404597759246826}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpcsim.2016.7568449","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcsim.2016.7568449","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on High Performance Computing &amp; Simulation (HPCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W579519726","https://openalex.org/W1717365219","https://openalex.org/W6616851794","https://openalex.org/W6618672500"],"related_works":["https://openalex.org/W2092304576","https://openalex.org/W2057234250","https://openalex.org/W2544369712","https://openalex.org/W2090939166","https://openalex.org/W1972265022","https://openalex.org/W3022562634","https://openalex.org/W4252926748","https://openalex.org/W2439357447","https://openalex.org/W2103261828","https://openalex.org/W4225987401"],"abstract_inverted_index":{"As":[0],"hardware":[1,13],"becomes":[2],"more":[3],"flexible":[4],"in":[5,15,26,44],"terms":[6],"of":[7,57],"programming,":[8],"software":[9],"APIs":[10],"must":[11],"expose":[12],"features":[14],"a":[16],"portable":[17],"way.":[18],"Thread":[19],"to":[20,84,98],"thread":[21,83,85],"communication":[22],"is":[23],"being":[24],"exposed":[25],"OpenCL":[27,46,65],"2.0":[28],"through":[29],"the":[30,40,45,55,63,72,90,95,101],"newly":[31],"defined":[32],"work-group":[33,41,73],"functions.":[34],"In":[35],"this":[36],"paper":[37],"we":[38,70,88],"analyze":[39,89],"broadcast":[42,74],"functionality":[43],"compiler":[47],"backend":[48],"for":[49,82],"Intel's":[50,58],"GPUs.":[51],"We":[52],"first":[53],"describe":[54,71],"particularities":[56],"GEN":[59],"GPU":[60],"architecture":[61],"and":[62,92],"Beignet":[64],"open":[66],"source":[67],"project.":[68],"Then":[69],"implementation":[75,96],"which":[76],"uses":[77],"shared":[78],"local":[79],"memory":[80],"read/write":[81],"communication.":[86],"Finally":[87],"performance":[91],"on":[93],"how":[94],"maps":[97],"hardware,":[99],"motivating":[100],"design":[102],"decisions.":[103]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
