{"id":"https://openalex.org/W3012471169","doi":"https://doi.org/10.1109/sips47522.2019.9020385","title":"Towards Algebraic Modeling of GPU Memory Access for Bank Conflict Mitigation","display_name":"Towards Algebraic Modeling of GPU Memory Access for Bank Conflict Mitigation","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W3012471169","doi":"https://doi.org/10.1109/sips47522.2019.9020385","mag":"3012471169"},"language":"en","primary_location":{"id":"doi:10.1109/sips47522.2019.9020385","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sips47522.2019.9020385","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Workshop on Signal Processing Systems (SiPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074228687","display_name":"Luca Ferranti","orcid":"https://orcid.org/0000-0001-5588-0920"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Luca Ferranti","raw_affiliation_strings":["Unit of Computing Sciences, Tampere University, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"Unit of Computing Sciences, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077772076","display_name":"Jani Boutellier","orcid":"https://orcid.org/0000-0001-7606-3655"},"institutions":[{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jani Boutellier","raw_affiliation_strings":["Unit of Computing Sciences, Tampere University, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"Unit of Computing Sciences, Tampere University, Tampere, Finland","institution_ids":["https://openalex.org/I166825849"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5074228687"],"corresponding_institution_ids":["https://openalex.org/I166825849"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22136708,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"103","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8855652809143066},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6829306483268738},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.6656296253204346},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.649493932723999},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.5629995465278625},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5479426980018616},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.5371931195259094},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.4443587362766266},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.42515161633491516},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.3994992971420288},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2925441861152649},{"id":"https://openalex.org/keywords/overlay","display_name":"Overlay","score":0.26548436284065247},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18167033791542053},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.0825657844543457}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8855652809143066},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6829306483268738},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.6656296253204346},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.649493932723999},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.5629995465278625},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5479426980018616},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.5371931195259094},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.4443587362766266},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.42515161633491516},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.3994992971420288},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2925441861152649},{"id":"https://openalex.org/C136085584","wikidata":"https://www.wikidata.org/wiki/Q910289","display_name":"Overlay","level":2,"score":0.26548436284065247},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18167033791542053},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.0825657844543457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sips47522.2019.9020385","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sips47522.2019.9020385","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Workshop on Signal Processing Systems (SiPS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6499999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W192356505","https://openalex.org/W194896021","https://openalex.org/W1992165427","https://openalex.org/W1992851788","https://openalex.org/W2005966285","https://openalex.org/W2021908319","https://openalex.org/W2056370875","https://openalex.org/W2096488256","https://openalex.org/W2099811626","https://openalex.org/W2109222446","https://openalex.org/W2113392201","https://openalex.org/W2163687928","https://openalex.org/W2735793669","https://openalex.org/W2801496955"],"related_works":["https://openalex.org/W2026512611","https://openalex.org/W4245497162","https://openalex.org/W1985165680","https://openalex.org/W1933089384","https://openalex.org/W2604972926","https://openalex.org/W254684032","https://openalex.org/W120214571","https://openalex.org/W1848192231","https://openalex.org/W4291801887","https://openalex.org/W1824582190"],"abstract_inverted_index":{"Graphics":[0],"Processing":[1],"Units":[2],"(GPU)":[3],"have":[4,20,113],"been":[5],"widely":[6],"used":[7,169],"in":[8,16,188],"various":[9],"fields":[10],"of":[11,60,87],"scientific":[12],"computing,":[13],"such":[14],"as":[15,110],"signal":[17],"processing.":[18],"GPUs":[19],"a":[21,88,100],"hierarchical":[22],"memory":[23,26,40,56,64,96,111],"structure":[24],"with":[25,143],"layers":[27],"that":[28,84,166],"are":[29,126],"shared":[30,66,95],"between":[31],"GPU":[32,42,63,89],"processing":[33,78],"elements.":[34,79],"Partly":[35],"due":[36],"to":[37,72,114,117,121,146,157,170,178],"the":[38,61,93,133,137,186],"complex":[39],"hierarchy,":[41],"programming":[43],"is":[44,68],"non-trivial,":[45],"and":[46,135,141,161,176],"several":[47],"aspects":[48],"must":[49,139],"be":[50,115,168],"taken":[51,128],"into":[52,70,129],"account,":[53],"one":[54],"being":[55],"access":[57,76,92],"patterns.":[58],"One":[59],"fastest":[62],"layers,":[65],"memory,":[67],"grouped":[69],"banks":[71],"enable":[73],"fast,":[74],"parallel":[75],"for":[77],"Unfortunately,":[80],"it":[81],"may":[82,91],"happen":[83,175],"multiple":[85],"threads":[86],"program":[90,106,147],"same":[94],"bank":[97,101,159,173],"simultaneously":[98],"causing":[99],"conflict.":[102],"If":[103],"this":[104,150],"happens,":[105],"execution":[107],"slows":[108],"down":[109],"accesses":[112],"rescheduled":[116],"determine":[118],"which":[119],"instruction":[120],"execute":[122],"first.":[123],"Bank":[124],"conflicts":[125,160,174],"not":[127],"account":[130],"automatically":[131],"by":[132],"compiler,":[134],"hence":[136],"programmer":[138],"detect":[140,158],"deal":[142],"them":[144],"prior":[145],"execution.":[148],"In":[149],"paper,":[151],"we":[152],"present":[153],"an":[154],"algebraic":[155],"approach":[156],"prove":[162],"some":[163],"theoretical":[164],"results":[165,184],"can":[167],"predict":[171],"when":[172],"how":[177],"avoid":[179],"them.":[180],"Also,":[181],"our":[182],"experimental":[183],"illustrate":[185],"savings":[187],"computation":[189],"time.":[190]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
