{"id":"https://openalex.org/W3113972533","doi":"https://doi.org/10.1109/wcsp49889.2020.9299844","title":"An Optimization Toolchain Design of Deep Learning Deployment Based on Heterogeneous Computing Platform","display_name":"An Optimization Toolchain Design of Deep Learning Deployment Based on Heterogeneous Computing Platform","publication_year":2020,"publication_date":"2020-10-21","ids":{"openalex":"https://openalex.org/W3113972533","doi":"https://doi.org/10.1109/wcsp49889.2020.9299844","mag":"3113972533"},"language":"en","primary_location":{"id":"doi:10.1109/wcsp49889.2020.9299844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wcsp49889.2020.9299844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Wireless Communications and Signal Processing (WCSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101861318","display_name":"Jun Yin","orcid":"https://orcid.org/0000-0002-2159-0658"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yin","raw_affiliation_strings":["Fudan University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066674438","display_name":"Jun Han","orcid":"https://orcid.org/0000-0002-5245-0754"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun han","raw_affiliation_strings":["Fudan University, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100376221","display_name":"Xiaodong Zhang","orcid":"https://orcid.org/0000-0002-8380-1019"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaodong Zhang","raw_affiliation_strings":["Sylincom Technology Co. Ltd, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sylincom Technology Co. Ltd, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1870686,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"631","last_page":"635"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/toolchain","display_name":"Toolchain","score":0.9350264668464661},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.8123221397399902},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7959758043289185},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6126473546028137},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5629570484161377},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5217338800430298},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5060300230979919},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4908352494239807},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.47333863377571106},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.44004976749420166},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42866241931915283},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4146995544433594},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.34502941370010376},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.33486127853393555},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.27742844820022583},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.21965724229812622},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15013381838798523},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11012792587280273},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10994479060173035}],"concepts":[{"id":"https://openalex.org/C2777062904","wikidata":"https://www.wikidata.org/wiki/Q545406","display_name":"Toolchain","level":3,"score":0.9350264668464661},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.8123221397399902},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7959758043289185},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6126473546028137},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5629570484161377},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5217338800430298},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5060300230979919},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4908352494239807},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.47333863377571106},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.44004976749420166},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42866241931915283},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4146995544433594},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34502941370010376},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33486127853393555},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.27742844820022583},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.21965724229812622},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15013381838798523},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11012792587280273},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10994479060173035},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wcsp49889.2020.9299844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wcsp49889.2020.9299844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Wireless Communications and Signal Processing (WCSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1980208272","https://openalex.org/W2010387036","https://openalex.org/W2055312318","https://openalex.org/W2096070062","https://openalex.org/W2103742924","https://openalex.org/W2135653967","https://openalex.org/W2417350949","https://openalex.org/W2528784626","https://openalex.org/W2590246587","https://openalex.org/W2612445135","https://openalex.org/W2787513823","https://openalex.org/W3136479147","https://openalex.org/W4251637954","https://openalex.org/W4297775537","https://openalex.org/W6680007323","https://openalex.org/W6737664043","https://openalex.org/W6748408460"],"related_works":["https://openalex.org/W2013037783","https://openalex.org/W2909413202","https://openalex.org/W4385243142","https://openalex.org/W1999008563","https://openalex.org/W2561644314","https://openalex.org/W2912135124","https://openalex.org/W2794118724","https://openalex.org/W4206450104","https://openalex.org/W3082104242","https://openalex.org/W4391382578"],"abstract_inverted_index":{"Progress":[0],"in":[1,38,48,61],"co-processor":[2],"acceleration":[3,30],"has":[4],"enabled":[5],"fast":[6],"and":[7,90,103],"high-performance":[8],"deployment":[9],"of":[10],"intensive":[11],"computation":[12],"applications":[13],"such":[14],"as":[15],"deep":[16,95],"learning":[17,96],"algorithms.":[18],"Recently,":[19],"new":[20],"heterogeneous":[21,49],"cooperation":[22,50],"patterns":[23],"have":[24],"been":[25],"studied":[26],"to":[27,59],"produce":[28],"further":[29],"on":[31,84,106],"discrete":[32],"or":[33],"large-scale":[34],"computing":[35],"systems.":[36],"Hence,":[37],"this":[39],"paper,":[40],"we":[41,79],"explore":[42],"auto-tuning":[43],"strategies":[44],"for":[45,94],"task":[46],"scheduling":[47],"between":[51],"kernel":[52,65],"operators.":[53],"The":[54],"tuning":[55],"outcomes":[56],"are":[57],"designed":[58],"work":[60,111],"harmony":[62],"with":[63,75,99],"the":[64,69,76,85,91,100,107],"operators":[66],"optimized":[67],"by":[68],"TVM":[70,102],"compiler.":[71],"To":[72],"hook":[73],"up":[74],"mainstream":[77],"frameworks,":[78],"build":[80],"a":[81,113],"toolchain":[82],"based":[83],"Heterogeneous":[86],"System":[87],"Architecture":[88],"(HSA)":[89],"ROCM":[92,108],"platform":[93],"deployment.":[97],"Compared":[98],"original":[101],"official":[104],"TensorFlow":[105],"platform,":[109],"our":[110],"achieves":[112],"higher":[114],"inference":[115],"speed.":[116]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
