{"id":"https://openalex.org/W4385623261","doi":"https://doi.org/10.1145/3588195.3595940","title":"COLTI: Towards Concurrent and Co-located DNN Training and Inference","display_name":"COLTI: Towards Concurrent and Co-located DNN Training and Inference","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4385623261","doi":"https://doi.org/10.1145/3588195.3595940"},"language":"en","primary_location":{"id":"doi:10.1145/3588195.3595940","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3588195.3595940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009949851","display_name":"Jaiaid Mobin","orcid":"https://orcid.org/0000-0002-4862-0036"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jaiaid Mobin","raw_affiliation_strings":["Rochester Institute of Technology, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049218913","display_name":"Avinash Maurya","orcid":"https://orcid.org/0000-0002-8200-0148"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Avinash Maurya","raw_affiliation_strings":["Rochester Institute of Technology, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054011637","display_name":"M. Mustafa Rafique","orcid":"https://orcid.org/0000-0002-5034-2880"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"M. Mustafa Rafique","raw_affiliation_strings":["Rochester Institute of Technology, Rochester, NY, USA"],"affiliations":[{"raw_affiliation_string":"Rochester Institute of Technology, Rochester, NY, USA","institution_ids":["https://openalex.org/I155173764"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009949851"],"corresponding_institution_ids":["https://openalex.org/I155173764"],"apc_list":null,"apc_paid":null,"fwci":0.4739,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.64394018,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"309","last_page":"310"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8647524118423462},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7303508520126343},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6396303176879883},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5114740133285522},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.502818763256073},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.49272873997688293},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.48381370306015015},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.47181570529937744},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4582291543483734},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41290926933288574},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.39816349744796753},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.37759605050086975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3482404947280884},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1625358760356903}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8647524118423462},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7303508520126343},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6396303176879883},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5114740133285522},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.502818763256073},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49272873997688293},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.48381370306015015},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.47181570529937744},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4582291543483734},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41290926933288574},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.39816349744796753},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.37759605050086975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3482404947280884},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1625358760356903},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3588195.3595940","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3588195.3595940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.4399999976158142}],"awards":[{"id":"https://openalex.org/G2131453672","display_name":null,"funder_award_id":"2106635","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3862349333","display_name":null,"funder_award_id":"2106634,2106635","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8515454090","display_name":null,"funder_award_id":"2106634","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W3097411828"],"related_works":["https://openalex.org/W2000785801","https://openalex.org/W986318368","https://openalex.org/W2384410913","https://openalex.org/W2352878646","https://openalex.org/W2004734601","https://openalex.org/W2130149817","https://openalex.org/W2990194547","https://openalex.org/W1480123525","https://openalex.org/W2620865396","https://openalex.org/W2414054180"],"abstract_inverted_index":{"Deep":[0],"learning":[1],"models":[2,153],"are":[3,105],"extensively":[4],"used":[5],"in":[6,155,166],"a":[7,44,126],"wide":[8],"range":[9],"of":[10,47,128,149],"domains,":[11],"e.g.,":[12],"scientific":[13],"simulations,":[14],"predictions,":[15],"and":[16,26,29,102,116,140,163,168],"modeling.":[17],"However,":[18,98],"training":[19,139],"these":[20],"dense":[21],"networks":[22],"is":[23],"both":[24],"compute":[25,59,66],"memory":[27,52,92,169],"intensive,":[28],"typically":[30,55],"requires":[31],"accelerators":[32],"such":[33,40,62],"as":[34],"Graphics":[35],"Processing":[36],"Units":[37],"(GPUs).":[38],"While":[39],"DNN":[41,103,138,152],"workloads":[42],"consume":[43],"major":[45],"proportion":[46],"the":[48,57,64,69,85,133,156],"limited":[49],"onboard":[50],"high-bandwidth":[51],"(HBM),":[53],"they":[54],"underutilize":[56],"GPU":[58,70,144],"resources.":[60],"In":[61,118],"scenarios,":[63],"idle":[65],"resources":[67,93],"on":[68,84,142],"can":[71,79,90],"be":[72,81],"leveraged":[73],"to":[74,108,112,131,161],"run":[75],"pending":[76],"jobs":[77],"that":[78],"either":[80],"(1)":[82],"accommodated":[83],"remainder":[86],"HBM,":[87],"or":[88],"(2)":[89],"share":[91],"with":[94],"other":[95],"concurrent":[96],"workloads.":[97],"state-of-the-art":[99],"workload":[100],"schedulers":[101],"runtimes":[104],"not":[106],"designed":[107],"leverage":[109],"HBM":[110],"co-location":[111],"improve":[113],"resource":[114],"utilization":[115],"throughput.":[117],"this":[119],"work,":[120],"we":[121],"propose":[122],"COLTI,":[123],"which":[124],"introduces":[125],"set":[127],"novel":[129],"techniques":[130],"solve":[132],"aforementioned":[134],"challenges":[135],"by":[136],"co-locating":[137],"inference":[141],"memory-constrained":[143],"devices.":[145],"Our":[146],"preliminary":[147],"evaluations":[148],"three":[150],"different":[151],"implemented":[154],"PyTorch":[157],"framework":[158],"demonstrate":[159],"up":[160],"37%":[162],"40%":[164],"improvement":[165],"makespan":[167],"utilization,":[170],"respectively.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
