{"id":"https://openalex.org/W4308083911","doi":"https://doi.org/10.1109/micro56248.2022.00044","title":"GCD2: A Globally Optimizing Compiler for Mapping DNNs to Mobile DSPs","display_name":"GCD2: A Globally Optimizing Compiler for Mapping DNNs to Mobile DSPs","publication_year":2022,"publication_date":"2022-10-01","ids":{"openalex":"https://openalex.org/W4308083911","doi":"https://doi.org/10.1109/micro56248.2022.00044"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro56248.2022.00044","pdf_url":null,"source":null,"license":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043054935","display_name":"Wei Niu","orcid":"https://orcid.org/0000-0002-2697-7042"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Niu","raw_affiliation_string":"William & Mary,USA","raw_affiliation_strings":["William & Mary,USA"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000582412","display_name":"Jiexiong Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiexiong Guan","raw_affiliation_string":"William & Mary,USA","raw_affiliation_strings":["William & Mary,USA"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012435102","display_name":"Xipeng Shen","orcid":"https://orcid.org/0000-0003-4551-186X"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535","https://openalex.org/I4210158053"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xipeng Shen","raw_affiliation_string":"North Carolina State University,USA","raw_affiliation_strings":["North Carolina State University,USA"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025596795","display_name":"Yanzhi Wang","orcid":"https://orcid.org/0000-0003-4767-7230"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhi Wang","raw_affiliation_string":"Northeastern University,USA","raw_affiliation_strings":["Northeastern University,USA"]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025342178","display_name":"Gagan Agrawal","orcid":null},"institutions":[{"id":"https://openalex.org/I25041050","display_name":"Augusta University","ror":"https://ror.org/012mef835","country_code":"US","type":"education","lineage":["https://openalex.org/I1289702989","https://openalex.org/I25041050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gagan Agrawal","raw_affiliation_string":"Augusta University, USA","raw_affiliation_strings":["Augusta University, USA"]},{"author_position":"last","author":{"id":"https://openalex.org/A5039372748","display_name":"Bin Ren","orcid":"https://orcid.org/0000-0001-9563-7175"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bin Ren","raw_affiliation_string":"William & Mary,USA","raw_affiliation_strings":["William & Mary,USA"]}],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"has_fulltext":false,"cited_by_count":0,"cited_by_percentile_year":{"min":0,"max":69},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Performance Optimization","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Performance Optimization","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Deep Learning in Computer Vision and Image Recognition","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric Devices for Low-Power Nanoscale Applications","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"keyword":"mapping dnns","score":0.5841},{"keyword":"compiler","score":0.4611}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8087914},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7900938},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.74513835},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5556766},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5451443},{"id":"https://openalex.org/C170595534","wikidata":"https://www.wikidata.org/wiki/Q249743","display_name":"Very long instruction word","level":2,"score":0.4961222},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26327574}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro56248.2022.00044","pdf_url":null,"source":null,"license":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation","award_id":null},{"funder":"https://openalex.org/F4320316514","funder_display_name":"Arm","award_id":null}],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1486493261","https://openalex.org/W2094969226","https://openalex.org/W1497412580","https://openalex.org/W1606391442","https://openalex.org/W2882144","https://openalex.org/W2372381897","https://openalex.org/W2039534605","https://openalex.org/W1584265037","https://openalex.org/W2008876287","https://openalex.org/W1606851719"],"ngrams_url":"https://api.openalex.org/works/W4308083911/ngrams","abstract_inverted_index":{"More":[0],"specialized":[1],"chips":[2],"are":[3,137],"exploiting":[4,52],"available":[5],"high":[6],"transistor":[7],"density":[8],"to":[9,34,56,105,182,198,229],"expose":[10],"parallelism":[11],"at":[12],"a":[13,25,100,119,140,232],"large":[14,153],"scale":[15],"with":[16,66,130],"more":[17,59],"intricate":[18],"instruction":[19,109],"sets.":[20],"This":[21],"paper":[22],"reports":[23],"on":[24,42,231],"compilation":[26,142],"system":[27,143],"GCD":[28,72,160,208],"2":[31,75,163,211],",":[32],"developed":[33],"support":[35,87,177],"complex":[36,60],"Deep":[37],"Neural":[38],"Network":[39],"(DNN)":[40],"workloads":[41],"mobile":[43,178,233],"DSP":[44,234],"chips.":[45],"We":[46],"observe":[47],"several":[48],"challenges":[49],"in":[50,118,139,215],"fully":[51],"this":[53],"architecture,":[54],"related":[55,104],"SIMD":[57,93],"width,":[58],"SIMD/vector":[61],"instructions,":[62,94],"and":[63,97,122,173,187,194,203],"VLIW":[64],"pipeline":[65],"the":[67,77,88,107,236],"notion":[68],"of":[69,82,90,99,115,219],"soft":[70,133],"dependencies.":[71,134],"comprises":[76],"following":[78],"contributions:":[79],"1)":[80],"development":[81],"matrix":[83],"layout":[84],"formats":[85],"that":[86,144,159,176],"use":[89],"different":[91],"novel":[92],"2)":[95],"formulation":[96],"solution":[98],"global":[101],"optimization":[102],"problem":[103],"choosing":[106],"best":[108],"(and":[110],"associated":[111],"layout)":[112],"for":[113,127,132,235],"implementation":[114,224],"each":[116],"operator":[117],"complete":[120,141],"DNN,":[121],"3)":[123],"SDA,":[124],"an":[125],"algorithm":[126],"packing":[128],"instructions":[129],"consideration":[131],"These":[135],"solutions":[136],"incorporated":[138],"is":[145,212],"extensively":[146],"evaluated":[147],"against":[148],"other":[149],"systems":[150],"using":[151],"10":[152],"DNN":[154,169],"models.":[155],"Evaluation":[156],"results":[157],"show":[158],"outperforms":[164,188],"two":[165,226],"product-level":[166],"state-of-the-art":[167],"end-to-end":[168],"execution":[170,218],"frameworks":[171],"(TFLite":[172],"Qualcomm":[174],"SNPE)":[175],"DSPs":[179],"by":[180,196],"up":[181,197],"$":[183],"6.0":[184],"\\times$":[185,202,205],"speedup,":[186,206],"three":[189],"established":[190],"compilers":[191],"(Halide,":[192],"TVM,":[193],"RAKE)":[195],"$4.5":[199],"\\times,":[200],"3.4":[201],"$4.0":[204],"respectively.":[207],"also":[213],"unique":[214],"supporting,":[216],"real-time":[217],"certain":[220],"DNNs,":[221],"while":[222],"its":[223],"enables":[225],"major":[227],"DNNs":[228],"execute":[230],"first":[237],"time.":[238]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4308083911","counts_by_year":[],"updated_date":"2024-03-01T18:03:21.018692","created_date":"2022-11-08"}