{"id":"https://openalex.org/W3190367077","doi":"https://doi.org/10.13016/kwbs-up51","title":"On Efficient GPGPU Computing for Integrated Heterogeneous CPU-GPU Microprocessors","display_name":"On Efficient GPGPU Computing for Integrated Heterogeneous CPU-GPU Microprocessors","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3190367077","doi":"https://doi.org/10.13016/kwbs-up51","mag":"3190367077"},"language":"en","primary_location":{"id":"mag:3190367077","is_oa":false,"landing_page_url":"https://drum.lib.umd.edu/handle/1903/27407","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"type":"dissertation","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.13016/kwbs-up51","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087383979","display_name":"Daniel Gerzhoy","orcid":"https://orcid.org/0000-0002-4277-9994"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gerzhoy, Daniel","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5087383979"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9621000289916992,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.8264445066452026},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7414771318435669},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7401496171951294},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6969698071479797},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.48000138998031616},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.44220811128616333},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4289221465587616},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3772912621498108},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.183921217918396},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.17720603942871094}],"concepts":[{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.8264445066452026},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7414771318435669},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7401496171951294},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6969698071479797},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.48000138998031616},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.44220811128616333},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4289221465587616},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3772912621498108},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.183921217918396},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.17720603942871094}],"mesh":[],"locations_count":2,"locations":[{"id":"mag:3190367077","is_oa":false,"landing_page_url":"https://drum.lib.umd.edu/handle/1903/27407","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null},{"id":"doi:10.13016/kwbs-up51","is_oa":true,"landing_page_url":"https://doi.org/10.13016/kwbs-up51","pdf_url":null,"source":{"id":"https://openalex.org/S4306402644","display_name":"Digital Repository at the University of Maryland (University of Maryland College Park)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.13016/kwbs-up51","is_oa":true,"landing_page_url":"https://doi.org/10.13016/kwbs-up51","pdf_url":null,"source":{"id":"https://openalex.org/S4306402644","display_name":"Digital Repository at the University of Maryland (University of Maryland College Park)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I66946132","host_organization_name":"University of Maryland, College Park","host_organization_lineage":["https://openalex.org/I66946132"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3007153607","https://openalex.org/W2350289853","https://openalex.org/W3169672792","https://openalex.org/W3083871806","https://openalex.org/W3015575992","https://openalex.org/W1997792613","https://openalex.org/W2000335122","https://openalex.org/W1773424846","https://openalex.org/W1552720876","https://openalex.org/W2483751807","https://openalex.org/W2599117486","https://openalex.org/W2389629787","https://openalex.org/W2165750519","https://openalex.org/W2910091684","https://openalex.org/W2015048865","https://openalex.org/W2523457297","https://openalex.org/W2533382481","https://openalex.org/W2989919840","https://openalex.org/W2011544816","https://openalex.org/W2364044215"],"abstract_inverted_index":{"Heterogeneous":[0,81],"microprocessors":[1,347],"which":[2,88],"integrate":[3],"a":[4,9,23,57,99,215,236,257,288,316],"CPU":[5,119,159,198,217],"and":[6,16,62,160,189,199,244,254,268,313,328,331],"GPU":[7,26,138,161,200],"on":[8,56,75,116,162,187,275,343],"single":[10],"chip":[11],"provide":[12,315],"low-overhead":[13],"CPU-GPU":[14,59,232,346],"communication":[15],"permit":[17],"sharing":[18,172,193],"of":[19,39,121,130,149,167,173,202,214,251,284,291,307,318],"on-chip":[20,175],"resources":[21],"that":[22,41,103,183,213,324],"traditional":[24],"discrete":[25,51],"would":[27,43],"not":[28],"have":[29,186],"direct":[30],"access":[31],"to.":[32],"These":[33],"features":[34],"allow":[35,278],"for":[36,47,80,234,239],"the":[37,117,122,131,135,143,150,158,163,171,174,181,197,249,252,272,279,285,301,305,329],"optimization":[38],"codes":[40],"heretofore":[42],"be":[44,54,223,262,340],"suitable":[45],"only":[46],"multi-core":[48],"CPUs":[49,243],"or":[50,90,294],"GPUs":[52],"to":[53,222,281,334],"run":[55,282],"heterogeneous":[58,154,345],"microprocessor":[60],"efficiently":[61],"in":[63,87,256,304,309],"some":[64],"cases-":[65],"with":[66,109,325],"increased":[67],"performance.":[68,269],"This":[69],"thesis":[70,151],"discusses":[71],"previously":[72,349],"published":[73],"work":[74,308],"exploiting":[76],"nested":[77,97],"MIMD-SIMD":[78],"Parallelization":[79],"microprocessors.":[82],"We":[83,229,322],"examined":[84],"loop":[85,102,133,292],"structures":[86],"one":[89],"more":[91,337],"regular":[92],"data":[93,192,209,221,274],"parallel":[94,100],"loops":[95,115],"are":[96],"within":[98],"outer":[101,114,144],"can":[104,261,339],"contain":[105],"irregular":[106],"code":[107],"(e.g.,":[108],"control":[110],"divergence).":[111],"By":[112,246],"scheduling":[113,248,306,327],"multicore":[118],"part":[120],"microprocessor,":[123],"each":[124],"thread":[125],"launches":[126],"dynamic,":[127],"independent":[128],"instances":[129],"inner":[132],"onto":[134],"GPU,":[136],"boosting":[137],"utilization":[139],"while":[140],"simultaneously":[141],"parallelizing":[142],"loop.":[145],"The":[146],"second":[147],"portion":[148],"proposal":[152],"explores":[153],"producer-consumer":[155,240],"data-sharing":[156],"between":[157,196,242],"microprocessor.":[164],"One":[165],"advantage":[166],"tight":[168],"integration":[169],"--":[170,178],"cache":[176],"system":[177],"could":[179],"improve":[180],"impact":[182],"memory":[184],"accesses":[185],"performance":[188],"power.":[190],"Producer-consumer":[191],"commonly":[194],"occurs":[195],"portions":[201],"programs,":[203],"but":[204],"large":[205],"kernel":[206],"sizes":[207],"whose":[208],"footprint":[210],"far":[211],"exceeds":[212],"typical":[216],"cache,":[218],"cause":[219],"shared":[220],"evicted":[224],"before":[225],"it":[226],"is":[227],"reused.":[228],"propose":[230],"Pipelined":[231],"Scheduling":[233],"Caches,":[235],"locality":[237],"transformation":[238],"relationships":[241],"GPUs.":[245],"intelligently":[247],"execution":[250],"producer":[253,280],"consumer":[255,286],"software":[258,311,332],"pipeline,":[259,312],"evictions":[260],"avoided,":[263],"saving":[264],"DRAM":[265],"accesses,":[266],"power,":[267],"To":[270],"keep":[271],"cached":[273],"chip,":[276],"we":[277,314],"ahead":[283],"by":[287],"certain":[289],"amount":[290],"iterations":[293],"threads.":[295],"Choosing":[296],"this":[297,310],"\"run-ahead":[298],"distance\"":[299],"becomes":[300],"main":[302],"constraint":[303],"method":[317],"statically":[319],"predicting":[320],"it.":[321],"assert":[323],"intelligent":[326],"hardware":[330],"mechanisms":[333],"support":[335],"it,":[336],"workloads":[338],"gainfully":[341],"executed":[342],"integrated":[344],"than":[348],"assumed.":[350]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
