{"id":"https://openalex.org/W4416298100","doi":"https://doi.org/10.1145/3777373","title":"Coinf: QoS-aware DRL-based Inference Task Scheduling Framework with Batching Processing","display_name":"Coinf: QoS-aware DRL-based Inference Task Scheduling Framework with Batching Processing","publication_year":2025,"publication_date":"2025-11-17","ids":{"openalex":"https://openalex.org/W4416298100","doi":"https://doi.org/10.1145/3777373"},"language":"en","primary_location":{"id":"doi:10.1145/3777373","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3777373","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014686940","display_name":"Guanglin Zhang","orcid":"https://orcid.org/0000-0003-4095-6843"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guanglin Zhang","raw_affiliation_strings":["Donghua University","Donghua University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Donghua University","institution_ids":["https://openalex.org/I181326427"]},{"raw_affiliation_string":"Donghua University, Shanghai, China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuhao Zhang","orcid":"https://orcid.org/0009-0006-0552-1130"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhao Zhang","raw_affiliation_strings":["Donghua University","Donghua University, Shanghai China"],"affiliations":[{"raw_affiliation_string":"Donghua University","institution_ids":["https://openalex.org/I181326427"]},{"raw_affiliation_string":"Donghua University, Shanghai China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068433742","display_name":"Xiaowen Huang","orcid":"https://orcid.org/0000-0001-6323-7070"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowen Huang","raw_affiliation_strings":["Donghua University","Donghua University, Shanghai China"],"affiliations":[{"raw_affiliation_string":"Donghua University","institution_ids":["https://openalex.org/I181326427"]},{"raw_affiliation_string":"Donghua University, Shanghai China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100749050","display_name":"Wenqian Zhang","orcid":"https://orcid.org/0000-0003-2007-6478"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenqian Zhang","raw_affiliation_strings":["Donghua University","Donghua University, Shanghai China"],"affiliations":[{"raw_affiliation_string":"Donghua University","institution_ids":["https://openalex.org/I181326427"]},{"raw_affiliation_string":"Donghua University, Shanghai China","institution_ids":["https://openalex.org/I181326427"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014686940"],"corresponding_institution_ids":["https://openalex.org/I181326427"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39162654,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"25","issue":"1","first_page":"1","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.8292999863624573,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.8292999863624573,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.037700001150369644,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.02280000038444996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7459999918937683},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6359000205993652},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.4916999936103821},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.47620001435279846},{"id":"https://openalex.org/keywords/provisioning","display_name":"Provisioning","score":0.4681999981403351},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.45649999380111694},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.37860000133514404},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.3725999891757965},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.37049999833106995}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8087999820709229},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7459999918937683},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6359000205993652},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5163000226020813},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.4916999936103821},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.49079999327659607},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C172191483","wikidata":"https://www.wikidata.org/wiki/Q1071806","display_name":"Provisioning","level":2,"score":0.4681999981403351},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.45649999380111694},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43479999899864197},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.3725999891757965},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.37049999833106995},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.35499998927116394},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.3319999873638153},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.32499998807907104},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.3208000063896179},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3050999939441681},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.304500013589859},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C107568181","wikidata":"https://www.wikidata.org/wiki/Q5319000","display_name":"Dynamic priority scheduling","level":3,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3777373","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3777373","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2062492869","display_name":null,"funder_award_id":"23XD1420100","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"},{"id":"https://openalex.org/G2885262493","display_name":null,"funder_award_id":"62301307","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G575906024","display_name":null,"funder_award_id":"2232025D-47","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320336023","display_name":"Program for Professor of Special Appointment (Eastern Scholar) at Shanghai Institutions of Higher Learning","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2417429787","https://openalex.org/W2604319603","https://openalex.org/W2963163009","https://openalex.org/W2964667342","https://openalex.org/W2982157693","https://openalex.org/W3016712945","https://openalex.org/W3097411828","https://openalex.org/W3133751570","https://openalex.org/W3181576318","https://openalex.org/W4281554877","https://openalex.org/W4285261368","https://openalex.org/W4289821779","https://openalex.org/W4293025059","https://openalex.org/W4312191382","https://openalex.org/W4312854700","https://openalex.org/W4313229743","https://openalex.org/W4360831782","https://openalex.org/W4385151994","https://openalex.org/W4387643251","https://openalex.org/W4389098960","https://openalex.org/W4394923310"],"related_works":[],"abstract_inverted_index":{"The":[0],"emergence":[1],"of":[2,29,32,37,87,90,109,157,176],"deploying":[3],"Deep":[4],"neural":[5],"network":[6],"(DNN)":[7],"services":[8],"on":[9,41,61],"edge":[10,62],"servers":[11],"has":[12,65],"spurred":[13],"research":[14],"into":[15,146],"efficiently":[16],"provisioning":[17],"inference":[18,59,125,181],"services.":[19],"However,":[20],"previous":[21],"studies":[22],"have":[23],"neglected":[24],"to":[25,70,104,141,170],"consider":[26],"the":[27,66,95,106,115,155,158,172],"implications":[28],"different":[30],"types":[31],"DNN":[33,58,92,144,160],"and":[34,79,129,174],"varying":[35],"quality":[36],"service":[38],"(QoS)":[39],"requirements":[40],"QoS":[42,130],"violation":[43,131],"rates.":[44],"In":[45],"this":[46],"article,":[47],"we":[48],"propose":[49],"a":[50,101,121],"novel":[51],"framework,":[52],"named":[53],"Coinf,":[54],"for":[55],"scheduling":[56],"heterogeneous":[57,180],"tasks":[60,93,145],"servers.":[63],"Coinf":[64,119,153,177],"following":[67],"four":[68],"advantages":[69],"effectively":[71],"handle":[72],"attribute":[73],"analysis,":[74],"performance":[75],"balancing,":[76],"parallel":[77,150],"execution,":[78],"model":[80,103],"accuracy:":[81],"(1)":[82],"It":[83,134],"enables":[84],"efficient":[85],"profiling":[86],"domain-specific":[88],"attributes":[89],"various":[91],"during":[94],"offline":[96],"stage,":[97],"achieved":[98],"by":[99,162],"constructing":[100],"regression":[102],"predict":[105],"end-to-end":[107],"latency":[108],"each":[110],"task.":[111],"(2)":[112],"By":[113],"utilizing":[114],"predicted":[116],"execution":[117],"time,":[118],"achieves":[120],"commendable":[122],"balance":[123],"among":[124],"latency,":[126],"system":[127],"throughput,":[128],"rate.":[132],"(3)":[133],"employs":[135],"emerging":[136],"deep":[137],"reinforcement":[138],"learning":[139],"(DRL)":[140],"aggregate":[142],"individual":[143],"batches,":[147],"enabling":[148],"concurrent":[149],"execution.":[151],"(4)":[152],"preserves":[154],"accuracies":[156],"provided":[159],"models":[161],"not":[163],"modifying":[164],"them.":[165],"Numerical":[166],"experiments":[167],"are":[168],"constructed":[169],"validate":[171],"reliability":[173],"efficiency":[175],"in":[178],"handling":[179],"tasks.":[182]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-17T00:00:00"}
