{"id":"https://openalex.org/W4404400640","doi":"https://doi.org/10.1145/3694715.3695978","title":"Improving DNN Inference Throughput Using Practical, Per-Input Compute Adaptation","display_name":"Improving DNN Inference Throughput Using Practical, Per-Input Compute Adaptation","publication_year":2024,"publication_date":"2024-11-04","ids":{"openalex":"https://openalex.org/W4404400640","doi":"https://doi.org/10.1145/3694715.3695978"},"language":"en","primary_location":{"id":"doi:10.1145/3694715.3695978","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3694715.3695978","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3694715.3695978","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 30th Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3694715.3695978","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090733623","display_name":"Anand Iyer","orcid":"https://orcid.org/0009-0009-3700-2994"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anand Padmanabha Iyer","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, US"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, US","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111538674","display_name":"Mingyu Guan","orcid":"https://orcid.org/0000-0001-6294-7978"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingyu Guan","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, US"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, US","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103107119","display_name":"Yinwei Dai","orcid":"https://orcid.org/0000-0002-9291-2060"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yinwei Dai","raw_affiliation_strings":["Princeton University, New Jersey, US"],"affiliations":[{"raw_affiliation_string":"Princeton University, New Jersey, US","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100638027","display_name":"Rui Pan","orcid":"https://orcid.org/0000-0002-6973-3259"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rui Pan","raw_affiliation_strings":["Princeton University, New Jersey, US"],"affiliations":[{"raw_affiliation_string":"Princeton University, New Jersey, US","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043630016","display_name":"Swapnil Gandhi","orcid":"https://orcid.org/0000-0003-3689-9591"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Swapnil Gandhi","raw_affiliation_strings":["Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053593890","display_name":"Ravi Netravali","orcid":"https://orcid.org/0000-0001-7002-5033"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ravi Netravali","raw_affiliation_strings":["Princeton University, New Jersey, US"],"affiliations":[{"raw_affiliation_string":"Princeton University, New Jersey, US","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5090733623"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.2914,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61115518,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"624","last_page":"639"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.8144094944000244},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7750476598739624},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7199625372886658},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6050089597702026},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3953407108783722},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07182908058166504}],"concepts":[{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.8144094944000244},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7750476598739624},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7199625372886658},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6050089597702026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3953407108783722},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07182908058166504},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3694715.3695978","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3694715.3695978","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3694715.3695978","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 30th Symposium on Operating Systems Principles","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3694715.3695978","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3694715.3695978","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3694715.3695978","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 30th Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404400640.pdf"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W2099001231","https://openalex.org/W2108598243","https://openalex.org/W2251994258","https://openalex.org/W2766839578","https://openalex.org/W2794670651","https://openalex.org/W2953212265","https://openalex.org/W2969388332","https://openalex.org/W2982157693","https://openalex.org/W2989743967","https://openalex.org/W2997006708","https://openalex.org/W2998183051","https://openalex.org/W3034292689","https://openalex.org/W3034368386","https://openalex.org/W3035030897","https://openalex.org/W3035038672","https://openalex.org/W3038012435","https://openalex.org/W3100560913","https://openalex.org/W3138154797","https://openalex.org/W3154971029","https://openalex.org/W3160106041","https://openalex.org/W3170113752","https://openalex.org/W3176017841","https://openalex.org/W3204647170","https://openalex.org/W4212774754","https://openalex.org/W4225004481","https://openalex.org/W4285595056","https://openalex.org/W4290877962","https://openalex.org/W4387321091","https://openalex.org/W4388874804","https://openalex.org/W4389523983","https://openalex.org/W6840061620"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"inference":[2,107],"platforms":[3],"continue":[4],"to":[5,21,59,77,113,155,178],"face":[6,71],"high":[7],"request":[8],"rates":[9],"and":[10,44,115,140,146,182],"strict":[11],"latency":[12],"constraints.":[13],"Existing":[14],"solutions":[15],"largely":[16,75],"focus":[17],"on":[18,47,84],"compressing":[19],"models":[20,103,121,148,184],"substantially":[22],"lower":[23],"compute":[24],"costs":[25,46],"(and":[26,87],"time)":[27],"with":[28,144],"mild":[29],"accuracy":[30,43],"degradations.":[31],"This":[32],"paper":[33],"explores":[34],"an":[35,64],"alternate":[36],"(but":[37],"complementary)":[38],"technique":[39],"that":[40,80,99,125,150],"trades":[41],"off":[42],"resource":[45,88,138],"a":[48,61,123,127,161,170],"perinput":[49],"granularity:":[50],"early":[51,69,101],"exit":[52,60,102],"models,":[53],"which":[54],"selectively":[55],"allow":[56],"certain":[57],"inputs":[58,82],"model":[62,91],"from":[63],"intermediate":[65],"layer.":[66],"Though":[67],"intuitive,":[68],"exits":[70],"fundamental":[72],"deployment":[73],"challenges,":[74],"owing":[76],"the":[78,96,134],"effects":[79],"exiting":[81],"have":[83],"batch":[85,129],"size":[86,130],"utilization)":[89],"throughout":[90,131],"execution.":[92],"We":[93],"present":[94],"E3,":[95],"first":[97],"system":[98],"makes":[100],"practical":[104],"for":[105,137],"realistic":[106],"deployments.":[108],"Our":[109],"key":[110],"insight":[111],"is":[112],"split":[114],"replicate":[116],"blocks":[117],"of":[118],"layers":[119],"in":[120,122,158,167],"manner":[124],"maintains":[126],"constant":[128],"execution,":[132],"all":[133],"while":[135],"accounting":[136],"requirements":[139],"communication":[141],"overheads.":[142],"Evaluations":[143],"NLP":[145],"vision":[147],"show":[149],"E3":[151],"can":[152],"deliver":[153],"up":[154],"1.74\u00d7":[156],"improvement":[157],"goodput":[159,175],"(for":[160,169],"fixed":[162,171],"cost)":[163],"or":[164],"1.78\u00d7":[165],"reduction":[166],"cost":[168],"goodput).":[172],"Additionally,":[173],"E3's":[174],"wins":[176],"generalize":[177],"autoregressive":[179],"LLMs":[180],"(2.8--3.8\u00d7)":[181],"compressed":[183],"(1.67\u00d7).":[185]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
