{"id":"https://openalex.org/W2595524172","doi":"https://doi.org/10.1080/17445760.2017.1296147","title":"Supernode transformation on GPGPUs","display_name":"Supernode transformation on GPGPUs","publication_year":2017,"publication_date":"2017-03-16","ids":{"openalex":"https://openalex.org/W2595524172","doi":"https://doi.org/10.1080/17445760.2017.1296147","mag":"2595524172"},"language":"en","primary_location":{"id":"doi:10.1080/17445760.2017.1296147","is_oa":false,"landing_page_url":"https://doi.org/10.1080/17445760.2017.1296147","pdf_url":null,"source":{"id":"https://openalex.org/S85375271","display_name":"International Journal of Parallel Emergent and Distributed Systems","issn_l":"1744-5760","issn":["1744-5760","1744-5779"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Parallel, Emergent and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077521546","display_name":"Yong Chen","orcid":"https://orcid.org/0000-0002-9961-9051"},"institutions":[{"id":"https://openalex.org/I16269868","display_name":"Santa Clara University","ror":"https://ror.org/03ypqe447","country_code":"US","type":"education","lineage":["https://openalex.org/I16269868"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yong Chen","raw_affiliation_strings":["Department of Computer Engineering, Santa Clara University , Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Santa Clara University , Santa Clara, CA, USA","institution_ids":["https://openalex.org/I16269868"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111910418","display_name":"Weijia Shang","orcid":null},"institutions":[{"id":"https://openalex.org/I16269868","display_name":"Santa Clara University","ror":"https://ror.org/03ypqe447","country_code":"US","type":"education","lineage":["https://openalex.org/I16269868"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weijia Shang","raw_affiliation_strings":["Department of Computer Engineering, Santa Clara University , Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Santa Clara University , Santa Clara, CA, USA","institution_ids":["https://openalex.org/I16269868"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5077521546"],"corresponding_institution_ids":["https://openalex.org/I16269868"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02476485,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":"2","first_page":"181","last_page":"202"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8160461783409119},{"id":"https://openalex.org/keywords/nested-loop-join","display_name":"Nested loop join","score":0.5719561576843262},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5645357370376587},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5349992513656616},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5004837512969971},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.48194169998168945},{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.42938053607940674},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3850927948951721},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.14927250146865845},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.1149565577507019}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8160461783409119},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.5719561576843262},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5645357370376587},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5349992513656616},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5004837512969971},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.48194169998168945},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.42938053607940674},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3850927948951721},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.14927250146865845},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.1149565577507019},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/17445760.2017.1296147","is_oa":false,"landing_page_url":"https://doi.org/10.1080/17445760.2017.1296147","pdf_url":null,"source":{"id":"https://openalex.org/S85375271","display_name":"International Journal of Parallel Emergent and Distributed Systems","issn_l":"1744-5760","issn":["1744-5760","1744-5779"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Parallel, Emergent and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W190620829","https://openalex.org/W1474358485","https://openalex.org/W1964354616","https://openalex.org/W1970141743","https://openalex.org/W2034761517","https://openalex.org/W2071110673","https://openalex.org/W2076848169","https://openalex.org/W2082352135","https://openalex.org/W2090011371","https://openalex.org/W2113737216","https://openalex.org/W2116896429","https://openalex.org/W2122804176","https://openalex.org/W2139177146","https://openalex.org/W2139908937","https://openalex.org/W2143329533","https://openalex.org/W2163193850","https://openalex.org/W2163751474","https://openalex.org/W2165654401","https://openalex.org/W2212640437","https://openalex.org/W2296218291","https://openalex.org/W2560137734","https://openalex.org/W2752885492","https://openalex.org/W2946046356","https://openalex.org/W4248073216","https://openalex.org/W4300100382","https://openalex.org/W6680750299"],"related_works":["https://openalex.org/W2112391112","https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W4254171597","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112"],"abstract_inverted_index":{"Supernode":[0],"transformation,":[1],"or":[2,215,224],"tiling,":[3],"is":[4,102,116,160,220],"a":[5,119,188,213,216,258],"technique":[6],"that":[7],"partitions":[8],"algorithms":[9,87],"to":[10,16,30,33,68,249,279],"improve":[11],"data":[12,95,134],"locality":[13],"and":[14,48,71,132,152,169,180],"parallelism":[15],"achieve":[17],"shortest":[18,82],"running":[19,84,114],"time.":[20,85,237,283],"It":[21,226],"groups":[22],"multiple":[23],"iterations":[24],"of":[25,74,121,139,149,195,209,263],"nested":[26,91],"loops":[27,92],"into":[28,207],"supernodes":[29,239],"be":[31,43,205,245,250],"assigned":[32],"processors":[34],"for":[35,79,111],"processing":[36],"in":[37,187,201,271],"parallel.":[38],"A":[39,107],"supernode":[40,46,54,64,66,77,123,158,172,222],"transformation":[41,55,223],"can":[42,204,277],"described":[44],"by":[45,183,257],"size":[47,131,159,173,262,276],"shape.":[49],"This":[50,218],"paper":[51],"focuses":[52],"on":[53,56,185,240,247],"General":[57],"Purpose":[58],"Graphic":[59],"Processing":[60],"Units":[61],"(GPGPUs),":[62],"including":[63],"scheduling,":[65],"mapping":[67],"GPGPU":[69,150],"blocks,":[70,151],"the":[72,75,81,112,122,129,133,136,147,153,170,199,202,228,234,241,253,264],"finding":[73],"optimal":[76,157,171,275],"size,":[78,124],"achieving":[80],"total":[83,113,235,281],"The":[86,97,156,167,238,261,274],"considered":[88],"are":[89,181],"two":[90],"with":[93],"regular":[94],"dependencies.":[96],"Longest":[98],"Common":[99],"Subsequence":[100],"problem":[101,130],"used":[103],"as":[104,118,128,146,198,212],"an":[105,268],"illustration.":[106],"novel":[108],"mathematical":[109],"model":[110,168],"time":[115,138],"established":[117],"function":[120],"algorithm":[125,192],"parameters":[126,144],"such":[127,145],"dependence,":[135],"computation":[137],"each":[140,256],"loop":[141],"iteration,":[142],"architecture":[143],"number":[148],"communication":[154,230],"cost.":[155],"derived":[161],"from":[162],"this":[163,272],"closed":[164],"form":[165],"model.":[166],"provide":[174],"better":[175],"results":[176],"than":[177],"previous":[178],"research":[179],"verified":[182],"simulations":[184],"GPGPUs.Iterations":[186],"two-dimensional":[189],"uniform":[190],"dependence":[191],"iteration":[193],"space":[194],"M\u00d7N,":[196],"shown":[197],"intersections":[200],"picture,":[203],"grouped":[206],"rectangles":[208],"w\u00d7h":[210],"known":[211],"tile":[214],"supernode.":[217],"process":[219],"called":[221],"tiling.":[225],"reduces":[227],"inter-iteration":[229],"cost":[231],"thus":[232],"improves":[233],"execution":[236,282],"same":[242,254],"wavefront":[243],"may":[244],"scheduled":[246],"GPU":[248,259],"processed":[251],"at":[252],"time,":[255],"block.":[260],"tile,":[265],"w\u00d7h,":[266],"plays":[267],"important":[269],"role":[270],"transformation.":[273],"lead":[278],"minimal":[280]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
