{"id":"https://openalex.org/W7128497811","doi":"https://doi.org/10.48550/arxiv.2602.08747","title":"PARD: Enhancing Goodput for Inference Pipeline via Proactive Request Dropping","display_name":"PARD: Enhancing Goodput for Inference Pipeline via Proactive Request Dropping","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128497811","doi":"https://doi.org/10.48550/arxiv.2602.08747"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.08747","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125507893","display_name":"Zhixin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhao, Zhixin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125550694","display_name":"Yitao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yitao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108149744","display_name":"Simin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Simin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125514739","display_name":"Mingfang Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Mingfang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125485824","display_name":"Wei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125531606","display_name":"Yuhao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050753595","display_name":"Laiping Zhao","orcid":"https://orcid.org/0000-0003-1967-2192"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Laiping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125591767","display_name":"Wenxin Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125584371","display_name":"Xiulong Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xiulong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114194341","display_name":"Wenyu Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Wenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125476647","display_name":"Hao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5125507893"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.22220000624656677,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.22220000624656677,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.14079999923706055,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.10790000110864639,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/goodput","display_name":"Goodput","score":0.965499997138977},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6122999787330627},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5515999794006348},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.489300012588501},{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.4171999990940094},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.40310001373291016},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.3939000070095062}],"concepts":[{"id":"https://openalex.org/C94022561","wikidata":"https://www.wikidata.org/wiki/Q1172393","display_name":"Goodput","level":4,"score":0.965499997138977},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8299000263214111},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6122999787330627},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5515999794006348},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.489300012588501},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.4505999982357025},{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.4171999990940094},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.36800000071525574},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3617999851703644},{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.29420000314712524},{"id":"https://openalex.org/C2781345722","wikidata":"https://www.wikidata.org/wiki/Q5308388","display_name":"Drop (telecommunication)","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.26989999413490295},{"id":"https://openalex.org/C129916263","wikidata":"https://www.wikidata.org/wiki/Q1141183","display_name":"Backward chaining","level":4,"score":0.2696000039577484},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.25780001282691956}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.08747","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.08747","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08747","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.08747","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"deep":[1],"neural":[2],"network":[3],"(DNN)":[4],"applications":[5],"integrate":[6],"multiple":[7],"DNN":[8],"models":[9],"into":[10],"inference":[11,30,115,138,167],"pipelines":[12,31],"with":[13,144],"stringent":[14],"latency":[15,44,185],"requirements":[16],"for":[17,29],"customized":[18],"tasks.":[19],"To":[20,132],"mitigate":[21],"extensive":[22],"request":[23,52,172],"timeouts":[24],"caused":[25],"by":[26,150,222],"accumulation,":[27],"systems":[28,58],"commonly":[32,49],"drop":[33,60,119,160,181,216],"a":[34,152,192],"subset":[35],"of":[36,92,99,109,165,194,210],"requests":[37,61,93,101,121,161,179],"so":[38],"the":[39,89,106,114,126,129,166,208,211,215],"remaining":[40,184],"ones":[41],"can":[42,75],"satisfy":[43],"constraints.":[45],"Since":[46],"it":[47,81],"is":[48],"believed":[50],"that":[51,113,156,175,201],"dropping":[53,85,100,105,148,154],"adversely":[54],"affects":[55],"goodput,":[56,79],"existing":[57],"only":[59],"when":[62,158],"they":[63],"have":[64],"to,":[65],"which":[66,177],"we":[67,135],"call":[68],"reactive":[69,73],"dropping.":[70],"However,":[71],"this":[72],"policy":[74],"not":[76],"maintain":[77],"high":[78],"as":[80],"neither":[82],"makes":[83],"timely":[84,145],"decisions":[86,149],"nor":[87],"identifies":[88],"proper":[90],"set":[91,108],"to":[94,97,124,159,180],"drop,":[95],"leading":[96],"issues":[98],"too":[102],"late":[103],"or":[104],"wrong":[107],"requests.":[110],"We":[111],"propose":[112],"system":[116,139],"should":[117],"proactively":[118],"certain":[120],"in":[122],"advance":[123],"enhance":[125],"goodput":[127,143,206],"across":[128],"entire":[130],"workload.":[131],"achieve":[133],"this,":[134],"design":[136],"an":[137,170],"PARD.":[140],"It":[141],"enhances":[142],"and":[146,169,187,218,224],"precise":[147],"integrating":[151],"proactive":[153],"method":[155],"decides":[157],"using":[162],"runtime":[163],"information":[164],"pipeline,":[168],"adaptive":[171],"priority":[173],"mechanism":[174],"selects":[176],"specific":[178],"based":[182],"on":[183,191],"budgets":[186],"workload":[188],"intensity.":[189],"Evaluation":[190],"cluster":[193],"64":[195],"GPUs":[196],"over":[197],"real-world":[198],"workloads":[199],"shows":[200],"PARD":[202],"achieves":[203],"$16\\%$-$176\\%$":[204],"higher":[205],"than":[207],"state":[209],"art":[212],"while":[213],"reducing":[214],"rate":[217],"wasted":[219],"computation":[220],"resources":[221],"$1.6\\times$-$17\\times$":[223],"$1.5\\times$-$62\\times$":[225],"respectively.":[226]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-11T00:00:00"}
