{"id":"https://openalex.org/W3007696333","doi":"https://doi.org/10.1145/3368826.3377917","title":"PreScaler: an efficient system-aware precision scaling framework on heterogeneous systems","display_name":"PreScaler: an efficient system-aware precision scaling framework on heterogeneous systems","publication_year":2020,"publication_date":"2020-02-21","ids":{"openalex":"https://openalex.org/W3007696333","doi":"https://doi.org/10.1145/3368826.3377917","mag":"3007696333"},"language":"en","primary_location":{"id":"doi:10.1145/3368826.3377917","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3368826.3377917","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019899003","display_name":"Seokwon Kang","orcid":"https://orcid.org/0000-0003-0017-592X"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Seokwon Kang","raw_affiliation_strings":["Hanyang University, South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University, South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070417804","display_name":"Kyunghwan Choi","orcid":"https://orcid.org/0000-0002-4832-1597"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kyunghwan Choi","raw_affiliation_strings":["Hanyang University, South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University, South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086980677","display_name":"Yongjun Park","orcid":"https://orcid.org/0000-0003-3725-0380"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yongjun Park","raw_affiliation_strings":["Hanyang University, South Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University, South Korea","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5019899003"],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":null,"apc_paid":null,"fwci":0.4621,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.55850755,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"48","issue":null,"first_page":"280","last_page":"292"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8292303085327148},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6021851897239685},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.44714105129241943},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.42186230421066284},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.41512182354927063},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.41319364309310913},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.36432820558547974},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3479745090007782},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.34349098801612854},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10209915041923523}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8292303085327148},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6021851897239685},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.44714105129241943},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.42186230421066284},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.41512182354927063},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.41319364309310913},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.36432820558547974},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3479745090007782},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34349098801612854},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10209915041923523},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3368826.3377917","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3368826.3377917","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE International Symposium on Code Generation and Optimization","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1492552760","https://openalex.org/W1982825626","https://openalex.org/W1992748711","https://openalex.org/W2005487033","https://openalex.org/W2014423307","https://openalex.org/W2026764611","https://openalex.org/W2061686014","https://openalex.org/W2105035536","https://openalex.org/W2114703523","https://openalex.org/W2117539524","https://openalex.org/W2170881177","https://openalex.org/W2265166184","https://openalex.org/W2411972054","https://openalex.org/W2798668634","https://openalex.org/W2914825488","https://openalex.org/W2915915007","https://openalex.org/W2945876280","https://openalex.org/W4231156094","https://openalex.org/W4232955467","https://openalex.org/W4234178886","https://openalex.org/W4242502357","https://openalex.org/W4245124044","https://openalex.org/W4246166885","https://openalex.org/W4247192460","https://openalex.org/W4251384337","https://openalex.org/W6651700774"],"related_works":["https://openalex.org/W141820298","https://openalex.org/W2049584446","https://openalex.org/W2079781215","https://openalex.org/W4378770497","https://openalex.org/W4308245303","https://openalex.org/W2014033564","https://openalex.org/W2910573937","https://openalex.org/W4385571583","https://openalex.org/W4389519396","https://openalex.org/W4391382578"],"abstract_inverted_index":{"Graphics":[0],"processing":[1,17],"units":[2],"(GPUs)":[3],"have":[4,81],"been":[5,58,82],"commonly":[6],"utilized":[7],"to":[8,25,30,73,164],"accelerate":[9],"multiple":[10],"emerging":[11],"applications,":[12],"such":[13],"as":[14],"big":[15],"data":[16,99],"and":[18,103,167,178,197],"machine":[19],"learning.":[20],"While":[21],"GPUs":[22],"are":[23],"proven":[24],"be":[26,160],"effective,":[27],"approximate":[28],"computing,":[29],"trade":[31],"off":[32],"performance":[33,45,86,139,217],"with":[34],"accuracy,":[35],"is":[36,88,95,154],"one":[37],"of":[38,49,78,98,116,187,219],"the":[39,59,96,117,137,141,156,185,192,203,222,226],"most":[40,60],"common":[41],"solutions":[42],"for":[43],"further":[44],"improvement.":[46],"Precision":[47],"scaling":[48,110,131],"originally":[50],"high-precision":[51],"values":[52,55],"into":[53],"lower-precision":[54],"has":[56],"recently":[57],"widely":[61],"used":[62],"GPU-side":[63,79],"approximation":[64],"technique,":[65],"including":[66],"hardware-level":[67],"half-precision":[68],"support.":[69],"Although":[70],"several":[71],"approaches":[72],"find":[74],"optimal":[75],"mixed-precision":[76,207],"configuration":[77,158,208],"kernels":[80],"introduced,":[83],"total":[84,92],"program":[85,138,149],"gain":[87,218],"often":[89],"low":[90],"because":[91],"execution":[93],"time":[94],"combination":[97],"transfer,":[100],"type":[101],"conversion,":[102],"kernel":[104,118],"execution.":[105],"As":[106],"a":[107,210],"result,":[108],"kernel-level":[109],"may":[111],"incur":[112],"high":[113],"type-conversion":[114],"overhead":[115],"input/output":[119],"data.":[120],"To":[121],"address":[122],"this":[123,125,172],"problem,":[124],"paper":[126],"proposes":[127],"an":[128,215],"automatic":[129],"precision":[130],"framework":[132],"called":[133],"PreScaler":[134,170,213],"that":[135,155],"maximizes":[136],"at":[140],"memory":[142],"object":[143],"level":[144,206],"by":[145],"considering":[146],"whole":[147],"OpenCL":[148],"flows.":[150],"The":[151],"main":[152],"difficulty":[153],"best":[157,204],"cannot":[159],"easily":[161],"predicted":[162],"due":[163],"various":[165],"application-":[166],"system-specific":[168],"characteristics.":[169],"solves":[171],"problem":[173],"using":[174,209],"search":[175,180],"space":[176],"minimization":[177],"decision-tree-based":[179,211],"processes.":[181],"First,":[182],"it":[183,201],"minimizes":[184],"number":[186],"test":[188],"configurations":[189],"based":[190],"on":[191],"information":[193],"from":[194],"system":[195],"inspection":[196],"dynamic":[198],"profiling.":[199],"Then,":[200],"finds":[202],"memory-object":[205],"search.":[212],"achieves":[214],"average":[216],"1.33x":[220],"over":[221],"baseline":[223],"while":[224],"maintaining":[225],"target":[227],"output":[228],"quality":[229],"level.":[230]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
