{"id":"https://openalex.org/W2775799034","doi":"https://doi.org/10.1109/iiswc.2017.8167777","title":"Moka: Model-based concurrent kernel analysis","display_name":"Moka: Model-based concurrent kernel analysis","publication_year":2017,"publication_date":"2017-10-01","ids":{"openalex":"https://openalex.org/W2775799034","doi":"https://doi.org/10.1109/iiswc.2017.8167777","mag":"2775799034"},"language":"en","primary_location":{"id":"doi:10.1109/iiswc.2017.8167777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iiswc.2017.8167777","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Symposium on Workload Characterization (IISWC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101516694","display_name":"Leiming Yu","orcid":"https://orcid.org/0000-0003-4917-1030"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Leiming Yu","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014736530","display_name":"Xun Gong","orcid":"https://orcid.org/0000-0003-0417-8028"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Xun Gong","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041025969","display_name":"Yifan Sun","orcid":"https://orcid.org/0000-0003-3532-6521"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Yifan Sun","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071202587","display_name":"Qianqian Fang","orcid":"https://orcid.org/0000-0003-0805-935X"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Qianqian Fang","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034777515","display_name":"Norm Rubin","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Norm Rubin","raw_affiliation_strings":["NVIDIA Research"],"affiliations":[{"raw_affiliation_string":"NVIDIA Research","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061128237","display_name":"David Kaeli","orcid":"https://orcid.org/0000-0002-5692-0151"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"David Kaeli","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101516694"],"corresponding_institution_ids":["https://openalex.org/I87182695"],"apc_list":null,"apc_paid":null,"fwci":0.4506,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.63971901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"9","issue":null,"first_page":"197","last_page":"206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8578240871429443},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7180678248405457},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6234617233276367},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5679450035095215},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.565061092376709},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4400976896286011},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.43028247356414795},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.41514450311660767},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.20374158024787903},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.101604163646698},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.10142114758491516}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8578240871429443},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7180678248405457},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6234617233276367},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5679450035095215},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.565061092376709},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4400976896286011},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.43028247356414795},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.41514450311660767},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.20374158024787903},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.101604163646698},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.10142114758491516},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iiswc.2017.8167777","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iiswc.2017.8167777","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Symposium on Workload Characterization (IISWC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1559450590","https://openalex.org/W1968047430","https://openalex.org/W1969863734","https://openalex.org/W1979527452","https://openalex.org/W1984296775","https://openalex.org/W2000335122","https://openalex.org/W2006197381","https://openalex.org/W2080592089","https://openalex.org/W2098274770","https://openalex.org/W2098290747","https://openalex.org/W2103742924","https://openalex.org/W2125551452","https://openalex.org/W2126118553","https://openalex.org/W2130967222","https://openalex.org/W2135682468","https://openalex.org/W2140440478","https://openalex.org/W2149693551","https://openalex.org/W2151142801","https://openalex.org/W2152517358","https://openalex.org/W2154786353","https://openalex.org/W2155503253","https://openalex.org/W2163687928","https://openalex.org/W2166163757","https://openalex.org/W2167334577","https://openalex.org/W2187089797","https://openalex.org/W2282413880","https://openalex.org/W2480662436","https://openalex.org/W2593733978","https://openalex.org/W2614463520","https://openalex.org/W2997701623","https://openalex.org/W3152199537","https://openalex.org/W4230022086","https://openalex.org/W4235295270","https://openalex.org/W4236786453","https://openalex.org/W4244454515","https://openalex.org/W4250602606","https://openalex.org/W6641984271","https://openalex.org/W6695022590","https://openalex.org/W6737915666"],"related_works":["https://openalex.org/W2000785801","https://openalex.org/W986318368","https://openalex.org/W2384410913","https://openalex.org/W2352878646","https://openalex.org/W2004734601","https://openalex.org/W2130149817","https://openalex.org/W2990194547","https://openalex.org/W1480123525","https://openalex.org/W2620865396","https://openalex.org/W2414054180"],"abstract_inverted_index":{"GPUs":[0],"continue":[1],"to":[2,19,63,78,139,171,196,235,253,271,284,288],"increase":[3],"the":[4,21,26,36,53,57,70,84,88,98,118,142,163,199,237,264],"number":[5,80],"of":[6,23,81,91,109,120,147,165,202,239],"compute":[7],"resources":[8,37,55],"with":[9],"each":[10,92],"new":[11],"generation.":[12],"Many":[13,39],"data-parallel":[14],"applications":[15,40,149,213,241,270,291],"have":[16,61,113,215,279],"been":[17],"re-engineered":[18],"leverage":[20,168],"thousands":[22],"cores":[24],"on":[25,56,117,176,226],"GPU.":[27,230],"But":[28],"not":[29],"every":[30],"kernel":[31,85,93,99,134,173,204],"can":[32,112,219,261],"fully":[33],"utilize":[34,52],"all":[35],"available.":[38],"contain":[41],"multiple":[42,216],"kernels":[43,217],"that":[44,161,214,218,245],"could":[45],"potentially":[46],"be":[47],"run":[48,220],"concurrently.":[49],"To":[50],"better":[51],"massive":[54],"GPU,":[58],"device":[59],"vendors":[60],"started":[62],"support":[64],"Concurrent":[65],"Kernel":[66],"Execution":[67],"(CKE).":[68],"However,":[69],"application":[71,274,286],"throughput":[72],"provided":[73],"by":[74,247],"CKE":[75,224,240,266],"is":[76,194,233],"subject":[77],"a":[79,114,227,281],"factors,":[82],"including":[83],"configuration":[86],"attributes,":[87],"dynamic":[89,158],"behavior":[90],"(e.g.,":[94],"compute-intentive":[95],"vs.":[96],"memory-intensive),":[97],"launch":[100],"order":[101],"and":[102,145,157,167,184,222],"inter-kernel":[103],"dependencies.":[104],"Minor":[105],"changes":[106],"in":[107,190],"any":[108],"theses":[110],"factors":[111],"large":[115],"impact":[116,162],"effectiveness":[119],"CKE.":[121,152,293],"In":[122],"this":[123],"paper,":[124],"we":[125,260,278],"present":[126],"Moka,":[127],"an":[128],"empirical":[129],"model":[130,193,209,232],"for":[131,268],"tuning":[132],"concurrent":[133,203],"performance.":[135,256],"Moka":[136],"allows":[137],"us":[138],"accurately":[140,197],"predict":[141,198,236],"resulting":[143],"performance":[144,200,225,238],"scalability":[146],"multi-kernel":[148],"when":[150],"using":[151,210,292],"We":[153,206,276],"consider":[154],"both":[155],"static":[156],"workload":[159],"characteristics":[160],"utility":[164],"CKE,":[166],"these":[169],"metrics":[170],"drive":[172],"scheduling":[174],"decisions":[175],"NVIDIA":[177,228],"GPUs.":[178],"The":[179],"underlying":[180],"data":[181],"transfer":[182],"pattern":[183],"GPU":[185],"resource":[186],"contention":[187],"are":[188],"analyzed":[189],"detail.":[191],"Our":[192,231],"able":[195,234],"ceiling":[201],"execution.":[205],"validate":[207],"our":[208,258,269],"several":[211],"real-world":[212],"concurrently,":[221],"evaluate":[223],"Maxwell":[229],"accurately,":[242],"providing":[243],"estimates":[244],"differ":[246],"less":[248],"than":[249],"12%":[250],"as":[251],"compared":[252],"actual":[254],"runtime":[255],"Using":[257],"estimates,":[259],"quickly":[262],"find":[263],"best":[265],"strategy":[267],"achieve":[272],"improved":[273],"throughput.":[275],"believe":[277],"developed":[280],"useful":[282],"tool":[283],"aid":[285],"programmers":[287],"accelerate":[289],"their":[290]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
