{"id":"https://openalex.org/W3009601136","doi":"https://doi.org/10.1109/tpds.2020.2978045","title":"Optimizing Streaming Parallelism on Heterogeneous Many-Core Architectures","display_name":"Optimizing Streaming Parallelism on Heterogeneous Many-Core Architectures","publication_year":2020,"publication_date":"2020-03-03","ids":{"openalex":"https://openalex.org/W3009601136","doi":"https://doi.org/10.1109/tpds.2020.2978045","mag":"3009601136"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2020.2978045","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2020.2978045","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2003.04294","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Peng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Peng Zhang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jianbin Fang","orcid":"https://orcid.org/0000-0003-3542-4869"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianbin Fang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Canqun Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Canqun Yang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chun Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chun Huang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tao Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Tang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zheng Wang","orcid":"https://orcid.org/0000-0001-6157-0662"},"institutions":[{"id":"https://openalex.org/I130828816","display_name":"University of Leeds","ror":"https://ror.org/024mrxd33","country_code":"GB","type":"education","lineage":["https://openalex.org/I130828816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["University of Leeds, Leeds, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Leeds, Leeds, United Kingdom","institution_ids":["https://openalex.org/I130828816"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":2.3421,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.88170974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"31","issue":"8","first_page":"1878","last_page":"1896"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7957000136375427,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.7957000136375427,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.054099999368190765,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.029600000008940697,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.8651000261306763},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.6328999996185303},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5996000170707703},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5174999833106995},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.5134000182151794},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4844000041484833},{"id":"https://openalex.org/keywords/shared-resource","display_name":"Shared resource","score":0.43630000948905945},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.4075999855995178},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.3919000029563904}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9088000059127808},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.8651000261306763},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.6328999996185303},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5996000170707703},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5842000246047974},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5174999833106995},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.5134000182151794},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4844000041484833},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.43630000948905945},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.4075999855995178},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4000999927520752},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.3718999922275543},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.3425999879837036},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.33230000734329224},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.31790000200271606},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.2913999855518341},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2888999879360199},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C48903430","wikidata":"https://www.wikidata.org/wiki/Q491370","display_name":"Graph partition","level":3,"score":0.2858000099658966},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.28049999475479126},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.2775999903678894},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C184596265","wikidata":"https://www.wikidata.org/wiki/Q2651576","display_name":"Model of computation","level":3,"score":0.2655999958515167},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2020.2978045","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2020.2978045","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2003.04294","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2003.04294","pdf_url":"https://arxiv.org/pdf/2003.04294","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2003.04294","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2003.04294","pdf_url":"https://arxiv.org/pdf/2003.04294","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W91689834","https://openalex.org/W1194153933","https://openalex.org/W1552624537","https://openalex.org/W1595275604","https://openalex.org/W1864199185","https://openalex.org/W1965942711","https://openalex.org/W1968197401","https://openalex.org/W1982020565","https://openalex.org/W1984296775","https://openalex.org/W1987564528","https://openalex.org/W1999329795","https://openalex.org/W2025437939","https://openalex.org/W2033139628","https://openalex.org/W2055312318","https://openalex.org/W2070897293","https://openalex.org/W2071128523","https://openalex.org/W2099680095","https://openalex.org/W2100218206","https://openalex.org/W2107483876","https://openalex.org/W2109608899","https://openalex.org/W2122078011","https://openalex.org/W2126026097","https://openalex.org/W2150476673","https://openalex.org/W2160241055","https://openalex.org/W2321173057","https://openalex.org/W2329650361","https://openalex.org/W2341343020","https://openalex.org/W2342906597","https://openalex.org/W2346144740","https://openalex.org/W2368012502","https://openalex.org/W2508492666","https://openalex.org/W2510484619","https://openalex.org/W2514474307","https://openalex.org/W2561782554","https://openalex.org/W2566887379","https://openalex.org/W2611626712","https://openalex.org/W2618440624","https://openalex.org/W2618564128","https://openalex.org/W2729162096","https://openalex.org/W2753445897","https://openalex.org/W2758013740","https://openalex.org/W2762847354","https://openalex.org/W2768065515","https://openalex.org/W2774383127","https://openalex.org/W2784409032","https://openalex.org/W2885901248","https://openalex.org/W2887967112","https://openalex.org/W2889063706","https://openalex.org/W2889543163","https://openalex.org/W2889745160","https://openalex.org/W2893742517","https://openalex.org/W2902340222","https://openalex.org/W2962724414","https://openalex.org/W2964301839","https://openalex.org/W2969338443","https://openalex.org/W3004176791","https://openalex.org/W3013976982","https://openalex.org/W3152199537","https://openalex.org/W4229790860","https://openalex.org/W4243205343","https://openalex.org/W4246166885","https://openalex.org/W4250981202","https://openalex.org/W6629867571","https://openalex.org/W6675354045"],"related_works":[],"abstract_inverted_index":{"As":[0],"many-core":[1,133,244],"accelerators":[2],"keep":[3],"integrating":[4],"more":[5,11],"processing":[6,42],"units,":[7],"it":[8,239],"becomes":[9],"increasingly":[10],"difficult":[12],"for":[13,120,127,169],"a":[14,51,93,117,138,151,164,170,204,246,250,263,288],"parallel":[15,129,235],"application":[16,149],"to":[17,33,74,114,141,166,194,218,232,254,279],"make":[18],"effective":[19,26,58],"use":[20],"of":[21,28,39,71,96,146,176,222,283],"all":[22],"available":[23],"resources.":[24],"An":[25],"way":[27],"improving":[29],"hardware":[30,64,121,187],"utilization":[31],"is":[32,87,92,161],"exploit":[34],"spatial":[35],"and":[36,47,67,84,99,106,124,155,237,249,265,271],"temporal":[37],"sharing":[38],"the":[40,69,75,80,100,143,147,220,255,269,272,284],"heterogeneous":[41,55,59,132,243],"units":[43],"by":[44,201,287],"multiplexing":[45],"computation":[46],"communication":[48],"tasks":[49],"-":[50],"strategy":[52],"known":[53],"as":[54,163],"streaming.":[56],"Achieving":[57],"streaming":[60],"requires":[61,182],"carefully":[62],"partitioning":[63,83],"among":[65],"tasks,":[66],"matching":[68],"granularity":[70,86,126,157],"task":[72,85,125,156],"parallelism":[73],"resource":[76,82,122,153],"partition.":[77],"However,":[78],"finding":[79],"right":[81],"extremely":[88],"challenging,":[89],"because":[90],"there":[91],"large":[94],"number":[95],"possible":[97],"solutions":[98],"optimal":[101],"solution":[102,119],"varies":[103],"across":[104],"programs":[105],"datasets.":[107],"This":[108],"article":[109],"presents":[110],"an":[111,178],"automatic":[112],"approach":[113,136,231,259],"quickly":[115,167],"derive":[116],"good":[118,171],"partition":[123,154],"task-based":[128],"applications":[130,236],"on":[131,240,261,268],"architectures.":[134],"Our":[135],"employs":[137],"performance":[139,145,221,285],"model":[140,160,180,206,213],"estimate":[142],"resulting":[144],"target":[148],"under":[150],"given":[152],"configuration.":[158],"The":[159,211],"used":[162,217],"utility":[165],"search":[168],"configuration":[172],"at":[173,226],"runtime.":[174,227],"Instead":[175],"hand-crafting":[177],"analytical":[179],"that":[181],"expert":[183],"insights":[184],"into":[185],"low-level":[186],"details,":[188],"we":[189],"employ":[190],"machine":[191],"learning":[192,203],"techniques":[193],"automatically":[195],"learn":[196],"it.":[197],"We":[198,228],"achieve":[199],"this":[200],"first":[202],"predictive":[205],"offline":[207],"using":[208],"training":[209],"programs.":[210],"learned":[212],"can":[214],"then":[215],"be":[216],"predict":[219],"any":[223],"unseen":[224],"program":[225],"apply":[229],"our":[230,258],"39":[233],"representative":[234,242],"evaluate":[238],"two":[241],"platforms:":[245],"CPU-XeonPhi":[247],"platform":[248],"CPU-GPU":[251],"platform.":[252],"Compared":[253],"single-stream":[256],"version,":[257],"achieves,":[260],"average,":[262],"1.6x":[264],"1.1x":[266],"speedup":[267],"XeonPhi":[270],"GPU":[273],"platform,":[274],"respectively.":[275],"These":[276],"results":[277],"translate":[278],"over":[280],"93":[281],"percent":[282],"delivered":[286],"theoretically":[289],"perfect":[290],"predictor.":[291]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-03-13T00:00:00"}
