{"id":"https://openalex.org/W3163183483","doi":"https://doi.org/10.1109/tpds.2021.3078254","title":"An Incremental Iterative Acceleration Architecture in Distributed Heterogeneous Environments With GPUs for Deep Learning","display_name":"An Incremental Iterative Acceleration Architecture in Distributed Heterogeneous Environments With GPUs for Deep Learning","publication_year":2021,"publication_date":"2021-05-07","ids":{"openalex":"https://openalex.org/W3163183483","doi":"https://doi.org/10.1109/tpds.2021.3078254","mag":"3163183483"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2021.3078254","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2021.3078254","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100646177","display_name":"Xuedong Zhang","orcid":"https://orcid.org/0000-0003-2456-2342"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuedong Zhang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, National Supercomputing Center in Changsha, Hunan University, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0003-2456-2342","affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, National Supercomputing Center in Changsha, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008574161","display_name":"Zhuo Tang","orcid":"https://orcid.org/0000-0001-9081-8153"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuo Tang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, National Supercomputing Center in Changsha, Hunan University, Changsha, China","Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0001-9081-8153","affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, National Supercomputing Center in Changsha, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]},{"raw_affiliation_string":"Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062552066","display_name":"Lifan Du","orcid":null},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lifan Du","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, National Supercomputing Center in Changsha, Hunan University, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, National Supercomputing Center in Changsha, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101863951","display_name":"Li Yang","orcid":"https://orcid.org/0000-0002-8929-7554"},"institutions":[{"id":"https://openalex.org/I56934997","display_name":"Changsha University of Science and Technology","ror":"https://ror.org/03yph8055","country_code":"CN","type":"education","lineage":["https://openalex.org/I56934997"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Yang","raw_affiliation_strings":["College of Computer and Communication Engineering, Changsha University of Science and Technology, Hunan, China"],"raw_orcid":"https://orcid.org/0000-0002-8929-7554","affiliations":[{"raw_affiliation_string":"College of Computer and Communication Engineering, Changsha University of Science and Technology, Hunan, China","institution_ids":["https://openalex.org/I56934997"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100646177"],"corresponding_institution_ids":["https://openalex.org/I16609230"],"apc_list":null,"apc_paid":null,"fwci":0.6412,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.70430089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"32","issue":"11","first_page":"2823","last_page":"2837"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8973193764686584},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7190062999725342},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5553275942802429},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.5017054080963135},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4820452630519867},{"id":"https://openalex.org/keywords/iterative-and-incremental-development","display_name":"Iterative and incremental development","score":0.42822128534317017},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.42613011598587036},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.36125561594963074},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1963290274143219}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8973193764686584},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7190062999725342},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5553275942802429},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.5017054080963135},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4820452630519867},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.42822128534317017},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.42613011598587036},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36125561594963074},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1963290274143219},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2021.3078254","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2021.3078254","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.5699999928474426}],"awards":[{"id":"https://openalex.org/G2833253907","display_name":null,"funder_award_id":"2017YFB0202201","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3919970536","display_name":null,"funder_award_id":"2018YFB1701400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3967896673","display_name":null,"funder_award_id":"2018YFB0203804","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G4914998227","display_name":null,"funder_award_id":"62002114","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6599199956","display_name":null,"funder_award_id":"CKCEST-2020-2-5","funder_id":"https://openalex.org/F4320327609","funder_display_name":"China Knowledge Centre for Engineering Sciences and Technology"},{"id":"https://openalex.org/G8583758428","display_name":null,"funder_award_id":"92055213","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8607382365","display_name":null,"funder_award_id":"L1924056","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8752286753","display_name":"\u9ad8\u6027\u80fd\u5f02\u6784\u8ba1\u7b97\u73af\u5883\u4e2d\u7684\u6df1\u5ea6\u5b66\u4e60\u8fed\u4ee3\u4f18\u5316\u53ca\u5de5\u4e1a\u5e94\u7528\u7814\u7a76","funder_award_id":"61873090","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327609","display_name":"China Knowledge Centre for Engineering Sciences and Technology","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W410850256","https://openalex.org/W1502544429","https://openalex.org/W1504291959","https://openalex.org/W1991126176","https://openalex.org/W2009127573","https://openalex.org/W2010929544","https://openalex.org/W2053744175","https://openalex.org/W2082171780","https://openalex.org/W2110975861","https://openalex.org/W2134483114","https://openalex.org/W2139605600","https://openalex.org/W2157729530","https://openalex.org/W2162390675","https://openalex.org/W2173213060","https://openalex.org/W2414493891","https://openalex.org/W2465698270","https://openalex.org/W2584612194","https://openalex.org/W2728765559","https://openalex.org/W2769815320","https://openalex.org/W2778396048","https://openalex.org/W2911184449","https://openalex.org/W2928052000","https://openalex.org/W2949866178","https://openalex.org/W2963288913","https://openalex.org/W2963390429","https://openalex.org/W2963566954","https://openalex.org/W2972427306","https://openalex.org/W2975712713","https://openalex.org/W2978742436","https://openalex.org/W2997133695","https://openalex.org/W2998062016","https://openalex.org/W3013018092","https://openalex.org/W3013976177","https://openalex.org/W3031432445","https://openalex.org/W3047600804","https://openalex.org/W3099324350","https://openalex.org/W3101558675","https://openalex.org/W4248767818","https://openalex.org/W4295830359","https://openalex.org/W6614148910","https://openalex.org/W6629990375","https://openalex.org/W6683722107","https://openalex.org/W6733201301","https://openalex.org/W6747481501","https://openalex.org/W7038512471"],"related_works":["https://openalex.org/W2498640783","https://openalex.org/W2022562732","https://openalex.org/W1495035728","https://openalex.org/W2034475059","https://openalex.org/W2026748623","https://openalex.org/W2758781407","https://openalex.org/W2185389722","https://openalex.org/W2528330837","https://openalex.org/W2170974505","https://openalex.org/W2360231538"],"abstract_inverted_index":{"The":[0,73,111],"parallel":[1,67],"computing":[2,48,58,68],"capabilities":[3],"of":[4,19,38,47,65,136,152,158,232],"GPUs":[5],"have":[6],"a":[7,35,53,142,174],"significant":[8],"impact":[9],"on":[10,102,127,178,202],"computationally":[11,80,213],"intensive":[12,81,214],"iterative":[13,40,45,57,82,91,181,215,224,233],"tasks.":[14,49],"Offloading":[15],"part":[16],"or":[17],"all":[18],"the":[20,25,28,44,128,134,149,156,163,198,222,230],"deep":[21,76],"learning":[22,77],"tasks":[23],"from":[24],"CPU":[26],"to":[27,120,132,185],"GPU":[29,70,112],"for":[30,123],"execution":[31],"is":[32,118,183],"mainstream.":[33],"However,":[34],"large":[36],"number":[37],"redundant":[39,90],"calculations":[41],"exist":[42],"in":[43,192],"process":[46],"Therefore,":[50],"we":[51,104,140,196],"propose":[52,105,141],"GPU-based":[54],"distributed":[55,66],"incremental":[56,113,180,223],"architecture":[59,74,200,226],"that":[60,221],"can":[61,227],"make":[62],"full":[63],"use":[64],"and":[69,78,88,98,155,170,189,205],"memory":[71,100,138,169],"structure.":[72],"supports":[75],"other":[79],"applications":[83,216],"by":[84,161],"optimizing":[85],"data":[86,96,109,159,187],"placement":[87],"reducing":[89],"calculations.":[92],"To":[93],"support":[94,186],"block-based":[95],"partitioning":[97],"coalesced":[99],"access":[101,154,160],"GPUs,":[103],"GDataSet,":[106],"an":[107,179],"abstract":[108],"set.":[110],"iteration":[114],"manager":[115],"called":[116],"GTracker":[117],"designed":[119,184],"be":[121],"responsible":[122],"GDataSet":[124],"cache":[125,153],"management":[126],"GPU.":[129],"In":[130],"order":[131],"solve":[133],"limitation":[135],"on-chip":[137,168],"size,":[139],"variable":[143],"sliding":[144],"window":[145],"mechanism.":[146],"It":[147],"improves":[148],"hit":[150],"rate":[151],"speed":[157],"realizing":[162],"best":[164],"block":[165],"arrangement":[166],"between":[167],"off-chip":[171],"memory.":[172],"Besides,":[173],"communication":[175,191],"channel":[176],"based":[177,201],"model":[182],"transmission":[188],"task":[190],"cluster":[193],"computing.":[194,234],"Finally,":[195],"implement":[197],"proposed":[199],"Spark":[203],"2.4.1":[204],"CUDA":[206],"10.0.":[207],"Comparative":[208],"experiments":[209],"with":[210],"widely":[211],"used":[212],"(K-means,":[217],"LSTM,":[218],"etc.)":[219],"show":[220],"acceleration":[225],"significantly":[228],"improve":[229],"efficiency":[231]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-06T09:05:17.133730","created_date":"2025-10-10T00:00:00"}
