{"id":"https://openalex.org/W2913561863","doi":"https://doi.org/10.1109/tc.2019.2895036","title":"Intra-Cluster Coalescing and Distributed-Block Scheduling to Reduce GPU NoC Pressure","display_name":"Intra-Cluster Coalescing and Distributed-Block Scheduling to Reduce GPU NoC Pressure","publication_year":2019,"publication_date":"2019-01-24","ids":{"openalex":"https://openalex.org/W2913561863","doi":"https://doi.org/10.1109/tc.2019.2895036","mag":"2913561863"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2019.2895036","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2019.2895036","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://biblio.ugent.be/publication/8616587/file/8616592.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100364374","display_name":"Lu Wang","orcid":"https://orcid.org/0000-0001-5759-6544"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Lu Wang","raw_affiliation_strings":["Department of Electronics and Information Systems (ELIS), Ghent University, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Information Systems (ELIS), Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072444630","display_name":"Xia Zhao","orcid":"https://orcid.org/0000-0001-6479-9200"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Xia Zhao","raw_affiliation_strings":["Department of Electronics and Information Systems (ELIS), Ghent University, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Information Systems (ELIS), Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061128237","display_name":"David Kaeli","orcid":"https://orcid.org/0000-0002-5692-0151"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Kaeli","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047295889","display_name":"Zhiying Wang","orcid":"https://orcid.org/0000-0003-3339-3085"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiying Wang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, Changsha, Hunan, P.R. China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, Changsha, Hunan, P.R. China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033119975","display_name":"Lieven Eeckhout","orcid":"https://orcid.org/0000-0001-8792-4473"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Lieven Eeckhout","raw_affiliation_strings":["Department of Electronics and Information Systems (ELIS), Ghent University, Gent, Belgium"],"affiliations":[{"raw_affiliation_string":"Department of Electronics and Information Systems (ELIS), Ghent University, Gent, Belgium","institution_ids":["https://openalex.org/I32597200"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100364374"],"corresponding_institution_ids":["https://openalex.org/I32597200"],"apc_list":null,"apc_paid":null,"fwci":1.2038,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.76154448,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"68","issue":"7","first_page":"1064","last_page":"1076"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8506288528442383},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.613174557685852},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5878356695175171},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5535905957221985},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5071713924407959},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.48451536893844604},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4640433192253113},{"id":"https://openalex.org/keywords/network-congestion","display_name":"Network congestion","score":0.44767463207244873},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.4397128224372864},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.41750848293304443},{"id":"https://openalex.org/keywords/network-packet","display_name":"Network packet","score":0.3940221965312958},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.34076064825057983},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2597266435623169},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.25479406118392944},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.1543152630329132}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8506288528442383},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.613174557685852},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5878356695175171},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5535905957221985},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5071713924407959},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.48451536893844604},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4640433192253113},{"id":"https://openalex.org/C195563490","wikidata":"https://www.wikidata.org/wiki/Q180368","display_name":"Network congestion","level":3,"score":0.44767463207244873},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.4397128224372864},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.41750848293304443},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.3940221965312958},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.34076064825057983},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2597266435623169},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.25479406118392944},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.1543152630329132},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tc.2019.2895036","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2019.2895036","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},{"id":"pmh:oai:archive.ugent.be:8625885","is_oa":false,"landing_page_url":"https://biblio.ugent.be/publication/8625885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journalArticle"},{"id":"pmh:oai:archive.ugent.be:8616587","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-8616587","pdf_url":"https://biblio.ugent.be/publication/8616587/file/8616592.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 1557-9956","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:archive.ugent.be:8616587","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-8616587","pdf_url":"https://biblio.ugent.be/publication/8616587/file/8616592.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 1557-9956","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"score":0.9100000262260437,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G102905528","display_name":null,"funder_award_id":"61572508","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2187338242","display_name":null,"funder_award_id":"61672526","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3782881470","display_name":null,"funder_award_id":"741097","funder_id":"https://openalex.org/F4320338335","funder_display_name":"H2020 European Research Council"},{"id":"https://openalex.org/G5765668298","display_name":null,"funder_award_id":"G.0144.17N","funder_id":"https://openalex.org/F4320321730","funder_display_name":"Fonds Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G6116909505","display_name":null,"funder_award_id":"ZK17-03-06","funder_id":"https://openalex.org/F4320324150","funder_display_name":"National University of Defense Technology"},{"id":"https://openalex.org/G8226679159","display_name":null,"funder_award_id":"G.0434.16N","funder_id":"https://openalex.org/F4320321730","funder_display_name":"Fonds Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321730","display_name":"Fonds Wetenschappelijk Onderzoek","ror":"https://ror.org/03qtxy027"},{"id":"https://openalex.org/F4320324150","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11"},{"id":"https://openalex.org/F4320338335","display_name":"H2020 European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2913561863.pdf","grobid_xml":"https://content.openalex.org/works/W2913561863.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W1982643312","https://openalex.org/W2018337509","https://openalex.org/W2028173051","https://openalex.org/W2047060659","https://openalex.org/W2048441570","https://openalex.org/W2066985990","https://openalex.org/W2079038734","https://openalex.org/W2080592089","https://openalex.org/W2092283255","https://openalex.org/W2093043622","https://openalex.org/W2100926301","https://openalex.org/W2128120785","https://openalex.org/W2130820665","https://openalex.org/W2163820265","https://openalex.org/W2169275978","https://openalex.org/W2235724720","https://openalex.org/W2441593292","https://openalex.org/W2464177207","https://openalex.org/W2521924282","https://openalex.org/W2551185646","https://openalex.org/W2605251767","https://openalex.org/W2605751925","https://openalex.org/W2619002702","https://openalex.org/W2625200202","https://openalex.org/W2735793669","https://openalex.org/W6683865707","https://openalex.org/W6719768283"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1001352512","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W1987128138","https://openalex.org/W2748922771"],"abstract_inverted_index":{"GPUs":[0,38,57],"continue":[1],"to":[2,10,27,46,76,101,118,124,144,187,203,219,223],"boost":[3],"the":[4,25,28,103,122,140,192,208],"number":[5],"of":[6,52,121,182],"streaming":[7],"multiprocessors":[8],"(SMs)":[9],"provide":[11],"increasingly":[12],"higher":[13],"compute":[14],"capabilities.":[15],"To":[16,127],"construct":[17],"a":[18,31,48,64,93,154],"scalable":[19],"crossbar":[20],"network-on-chip":[21],"(NoC)":[22],"that":[23,85,161],"connects":[24],"SMs":[26,42,91,152],"memory":[29,105],"controllers,":[30],"cluster":[32,94],"structure":[33],"is":[34],"introduced":[35],"in":[36,39,86,92,153],"modern":[37],"which":[40,62],"several":[41],"are":[43,167],"grouped":[44],"together":[45],"share":[47],"network":[49,53,74],"port.":[50],"Because":[51],"port":[54],"sharing,":[55],"clustered":[56],"face":[58],"severe":[59],"NoC":[60,79,109,130],"congestion,":[61],"creates":[63],"critical":[65],"performance":[66,172,180],"bottleneck.":[67],"In":[68,81],"this":[69],"paper,":[70],"we":[71,83,132],"target":[72],"redundant":[73,99,129],"traffic":[75],"mitigate":[77],"GPU":[78],"congestion.":[80],"particular,":[82],"observe":[84],"many":[87],"GPU-compute":[88],"applications,":[89],"different":[90],"access":[95,102],"shared":[96],"data.":[97],"Sending":[98],"requests":[100,123],"same":[104,193],"location":[106],"wastes":[107],"valuable":[108],"bandwidth-we":[110],"find":[111],"on":[112,215],"average":[113,179,216],"19":[114,204,213],"percent":[115,184,200,214],"(and":[116,185,201,217],"up":[117,186,202,218],"48":[119],"percent)":[120,189,205],"be":[125],"redundant.":[126],"remove":[128],"traffic,":[131],"propose":[133],"distributed-block":[134,162],"scheduling,":[135,163],"intra-cluster":[136],"coalescing":[137],"(ICC)":[138],"and":[139,150,165,169,173,206],"coalesced":[141],"cache":[142,147],"(CC)":[143],"coalesce":[145],"L1":[146],"misses":[148],"within":[149],"across":[151],"cluster,":[155],"respectively.":[156],"Our":[157],"evaluation":[158],"results":[159],"show":[160],"ICC":[164],"CC":[166],"complementary":[168],"improve":[170],"both":[171],"energy":[174,197],"consumption.":[175],"We":[176],"report":[177],"an":[178],"improvement":[181],"15":[183],"67":[188],"while":[190],"at":[191],"time":[194],"reducing":[195],"system":[196],"by":[198,212],"6":[199],"improving":[207],"energy-delay":[209],"product":[210],"(EDP)":[211],"53":[220],"percent),":[221],"compared":[222],"state-of-the-art":[224],"distributed":[225],"CTA":[226],"scheduling.":[227]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
