{"id":"https://openalex.org/W4402896823","doi":"https://doi.org/10.1109/iwqos61813.2024.10682949","title":"SyncIntellects: Orchestrating LLM Inference with Progressive Prediction and QoS-Friendly Control","display_name":"SyncIntellects: Orchestrating LLM Inference with Progressive Prediction and QoS-Friendly Control","publication_year":2024,"publication_date":"2024-06-19","ids":{"openalex":"https://openalex.org/W4402896823","doi":"https://doi.org/10.1109/iwqos61813.2024.10682949"},"language":"en","primary_location":{"id":"doi:10.1109/iwqos61813.2024.10682949","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwqos61813.2024.10682949","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACM 32nd International Symposium on Quality of Service (IWQoS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043582832","display_name":"Xue Lin","orcid":"https://orcid.org/0000-0001-6210-8883"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xue Lin","raw_affiliation_strings":["Nankai University,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017237474","display_name":"Zhibo Zhang","orcid":"https://orcid.org/0009-0006-0524-5236"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhibo Zhang","raw_affiliation_strings":["Nankai University,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113404327","display_name":"Peining Yue","orcid":null},"institutions":[{"id":"https://openalex.org/I4210097143","display_name":"Tianjin Research Institute of Electric Science (China)","ror":"https://ror.org/010ryc044","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210097143"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peining Yue","raw_affiliation_strings":["State Grid Tianjin Electronic Power Company,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"State Grid Tianjin Electronic Power Company,Tianjin,China","institution_ids":["https://openalex.org/I4210097143"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327735","display_name":"Haoran Li","orcid":"https://orcid.org/0000-0002-8518-5491"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoran Li","raw_affiliation_strings":["Nankai University,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406022","display_name":"Jin Zhang","orcid":"https://orcid.org/0000-0003-4871-6318"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Zhang","raw_affiliation_strings":["Nankai University,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102813072","display_name":"Baoyu Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baoyu Fan","raw_affiliation_strings":["Nankai University,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070555846","display_name":"Huayou Su","orcid":"https://orcid.org/0000-0002-3587-0917"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huayou Su","raw_affiliation_strings":["National University of Defense Technology,Hunan,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Hunan,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045499166","display_name":"Xiaoli Gong","orcid":"https://orcid.org/0000-0002-9836-558X"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoli Gong","raw_affiliation_strings":["Nankai University,Tianjin,China"],"affiliations":[{"raw_affiliation_string":"Nankai University,Tianjin,China","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5043582832"],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":0.3637,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67211354,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7516999840736389,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7516999840736389,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.7513999938964844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14351","display_name":"Statistical and Computational Modeling","score":0.7251999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6964589357376099},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6808496117591858},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.4930398166179657},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4354832172393799},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3654964864253998},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.21031340956687927}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6964589357376099},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6808496117591858},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.4930398166179657},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4354832172393799},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3654964864253998},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.21031340956687927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwqos61813.2024.10682949","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwqos61813.2024.10682949","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/ACM 32nd International Symposium on Quality of Service (IWQoS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1966523365","https://openalex.org/W2131641675","https://openalex.org/W2606964149","https://openalex.org/W2889787757","https://openalex.org/W2912924812","https://openalex.org/W2933138175","https://openalex.org/W2963323070","https://openalex.org/W2963339397","https://openalex.org/W2964110616","https://openalex.org/W3081168214","https://openalex.org/W3083199591","https://openalex.org/W3090350559","https://openalex.org/W3114271877","https://openalex.org/W3130716829","https://openalex.org/W3155584966","https://openalex.org/W3157677060","https://openalex.org/W3175546025","https://openalex.org/W3185341429","https://openalex.org/W3194676777","https://openalex.org/W3203701986","https://openalex.org/W4280622851","https://openalex.org/W4324054705","https://openalex.org/W4384302749","https://openalex.org/W4387321091","https://openalex.org/W4388979610","https://openalex.org/W4389521054","https://openalex.org/W6604455612","https://openalex.org/W6737479944","https://openalex.org/W6747759466","https://openalex.org/W6761205521","https://openalex.org/W6766978945","https://openalex.org/W6767997687","https://openalex.org/W6769627184","https://openalex.org/W6770820644","https://openalex.org/W6782879696","https://openalex.org/W6787837539","https://openalex.org/W6796196852","https://openalex.org/W6809646742","https://openalex.org/W6850202480","https://openalex.org/W6850927664","https://openalex.org/W6850988808","https://openalex.org/W6853325866","https://openalex.org/W6853336479","https://openalex.org/W6854866820","https://openalex.org/W6857972656","https://openalex.org/W6861130909"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Large":[0,107],"Language":[1,108],"Models":[2],"(LLMs)":[3],"have":[4,168],"shown":[5],"impressive":[6],"capabilities,":[7],"especially":[8],"in":[9,89,201,211],"the":[10,65,86,162,165,181,195],"realm":[11],"of":[12,48,119,157,164],"Human-Machine":[13],"Chat":[14],"Systems.":[15],"Nevertheless,":[16],"these":[17],"models":[18],"entail":[19],"significant":[20,59,199],"computational":[21],"expenses,":[22],"particularly":[23],"when":[24],"generating":[25],"tokens.":[26],"As":[27],"a":[28,46,82,102,138,142,176,198],"remedy":[29],"to":[30,67,80,93,105,154],"enhance":[31],"system":[32],"throughput":[33,212],"and":[34,52,85,117,171,194],"hardware":[35],"utilization,":[36],"batch":[37,47],"scheduling":[38],"is":[39,64,152],"commonly":[40],"adopted.":[41],"This":[42],"method":[43],"involves":[44],"initiating":[45],"inference":[49],"requests":[50,69],"concurrently":[51],"then":[53],"waiting":[54],"for":[55],"their":[56],"completion.":[57],"A":[58],"challenge":[60],"encountered":[61],"with":[62,70,112,175,208],"task-batching":[63],"need":[66],"group":[68],"similar":[71],"response":[72,77,90,114,127,144,158],"lengths.":[73],"However,":[74],"accurately":[75],"predicting":[76],"length":[78,91,115,122,128,145],"proves":[79],"be":[81],"daunting":[83],"task,":[84],"inherent":[87],"variability":[88],"leads":[92],"suboptimal":[94],"resource":[95],"utilization.In":[96],"this":[97],"paper,":[98],"we":[99],"introduce":[100],"SyncIntellects,":[101],"framework":[103],"designed":[104],"orchestrate":[106],"Model":[109],"(LLM)":[110],"Inference":[111],"fine-grained":[113],"prediction":[116,129],"Quality":[118],"Service":[120],"(QoS)-Friendly":[121],"control.":[123],"Specifically,":[124],"SyncIntellects":[125,170],"enhances":[126],"by":[130,203,213],"leveraging":[131],"embedding":[132],"information":[133],"during":[134],"token":[135],"generation":[136],"through":[137],"transformer-based":[139],"model.":[140,184],"Subsequently,":[141],"dynamic":[143],"controller":[146],"based":[147,179],"on":[148,180,189,205],"Prompt":[149],"Engineering":[150],"techniques":[151],"employed":[153],"ensure":[155],"alignment":[156],"lengths":[159],"without":[160],"compromising":[161],"QoS":[163],"responses.":[166],"We":[167,185],"implemented":[169],"seamlessly":[172],"integrated":[173],"it":[174],"chatbot":[177],"engine":[178],"llama2":[182],"7B":[183],"conduct":[186],"comprehensive":[187],"experiments":[188],"an":[190,209],"NVIDIA":[191],"A100-based":[192],"testbed,":[193],"results":[196],"demonstrate":[197],"reduction":[200],"latency":[202],"17.76%":[204],"average,":[206],"along":[207],"increase":[210],"9.34%.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
