{"id":"https://openalex.org/W4402502698","doi":"https://doi.org/10.48550/arxiv.2408.10143","title":"Data-Driven Analysis to Understand GPU Hardware Resource Usage of Optimizations","display_name":"Data-Driven Analysis to Understand GPU Hardware Resource Usage of Optimizations","publication_year":2024,"publication_date":"2024-08-19","ids":{"openalex":"https://openalex.org/W4402502698","doi":"https://doi.org/10.48550/arxiv.2408.10143"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2408.10143","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.10143","pdf_url":"https://arxiv.org/pdf/2408.10143","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2408.10143","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002465410","display_name":"Tanzima Islam","orcid":"https://orcid.org/0000-0003-2877-5871"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Islam, Tanzima Z.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070686608","display_name":"Aniruddha Marathe","orcid":"https://orcid.org/0000-0003-0546-4472"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marathe, Aniruddha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109864509","display_name":"Holland Schutte","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schutte, Holland","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5106737504","display_name":"Mohammad Zaeed","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zaeed, Mohammad","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002465410"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.968500018119812,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.968500018119812,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9362999796867371,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6980834603309631},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.547198474407196},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5120871067047119},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4760948419570923},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4348275661468506},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4144169092178345},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.41354307532310486},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3426669239997864},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.11836853623390198}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6980834603309631},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.547198474407196},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5120871067047119},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4760948419570923},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4348275661468506},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4144169092178345},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.41354307532310486},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3426669239997864},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.11836853623390198},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2408.10143","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.10143","pdf_url":"https://arxiv.org/pdf/2408.10143","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2408.10143","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2408.10143","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2408.10143","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.10143","pdf_url":"https://arxiv.org/pdf/2408.10143","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W3213381848","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759"],"abstract_inverted_index":{"With":[0],"heterogeneous":[1],"systems,":[2],"the":[3,28,56,141,148,157],"number":[4],"of":[5,113,143],"GPUs":[6,26],"per":[7],"chip":[8],"increases":[9],"to":[10,30,90,99,151,167],"provide":[11],"computational":[12],"capabilities":[13],"for":[14,24,34,42,116,126],"solving":[15],"science":[16],"at":[17],"a":[18,63,87,117,152],"nanoscopic":[19],"scale.":[20],"However,":[21],"low":[22],"utilization":[23,102,161],"single":[25],"defies":[27],"need":[29],"invest":[31],"more":[32],"money":[33],"expensive":[35],"ccelerators.":[36],"While":[37],"related":[38],"work":[39],"develops":[40],"optimizations":[41,50,115,150],"improving":[43],"application":[44,104],"performance,":[45],"none":[46],"studies":[47],"how":[48,74],"these":[49],"impact":[51],"hardware":[52,75,109,134],"resource":[53,76,110,135],"usage":[54,77,111,136],"or":[55,83],"average":[57],"GPU":[58],"utilization.":[59],"This":[60],"paper":[61],"takes":[62],"data-driven":[64],"analysis":[65],"approach":[66],"in":[67],"addressing":[68],"this":[69],"gap":[70],"by":[71,146,165],"(1)":[72],"characterizing":[73],"affects":[78],"device":[79,101,160],"utilization,":[80],"execution":[81,158],"time,":[82,159],"both,":[84],"(2)":[85],"presenting":[86],"multi-objective":[88],"metric":[89],"identify":[91],"important":[92],"application-device":[93],"interactions":[94],"that":[95],"can":[96],"be":[97],"optimized":[98],"improve":[100],"and":[103,120,162,170],"performance":[105],"jointly,":[106],"(3)":[107],"studying":[108],"behaviors":[112],"several":[114,127],"benchmark":[118],"application,":[119,154],"finally":[121],"(4)":[122],"identifying":[123],"optimization":[124],"opportunities":[125],"scientific":[128],"proxy":[129,153],"applications":[130],"based":[131],"on":[132],"their":[133],"behaviors.":[137],"Furthermore,":[138],"we":[139],"demonstrate":[140],"applicability":[142],"our":[144],"methodology":[145],"applying":[147],"identified":[149],"which":[155],"improves":[156],"power":[163],"consumption":[164],"up":[166],"29.6%,":[168],"5.3%":[169],"26.5%":[171],"respectively.":[172]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
