{"id":"https://openalex.org/W7161029263","doi":"https://doi.org/10.48550/arxiv.2605.12001","title":"CR^2: Cost-Aware Risk-Controlled Routing for Wireless Device-Edge LLM Inference","display_name":"CR^2: Cost-Aware Risk-Controlled Routing for Wireless Device-Edge LLM Inference","publication_year":2026,"publication_date":"2026-05-12","ids":{"openalex":"https://openalex.org/W7161029263","doi":"https://doi.org/10.48550/arxiv.2605.12001"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.12001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.12001","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136002620","display_name":"Nan Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Nan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136025950","display_name":"Shengkang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Shengkang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136082631","display_name":"Zhiyong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhiyong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136066079","display_name":"Jiangchao Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Jiangchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136051436","display_name":"Yaping Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yaping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136014715","display_name":"Zixia Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Zixia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136051854","display_name":"Meixia Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Meixia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.29580000042915344,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.29580000042915344,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.10509999841451645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.10189999639987946,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.650600016117096},{"id":"https://openalex.org/keywords/static-routing","display_name":"Static routing","score":0.5848000049591064},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.5199000239372253},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5058000087738037},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.46790000796318054},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.41909998655319214},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.39879998564720154},{"id":"https://openalex.org/keywords/link-state-routing-protocol","display_name":"Link-state routing protocol","score":0.39800000190734863},{"id":"https://openalex.org/keywords/policy-based-routing","display_name":"Policy-based routing","score":0.366100013256073}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7318999767303467},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.650600016117096},{"id":"https://openalex.org/C204948658","wikidata":"https://www.wikidata.org/wiki/Q1119410","display_name":"Static routing","level":4,"score":0.5848000049591064},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.5199000239372253},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5058000087738037},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.5048999786376953},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.41909998655319214},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.39879998564720154},{"id":"https://openalex.org/C89305328","wikidata":"https://www.wikidata.org/wiki/Q1755411","display_name":"Link-state routing protocol","level":4,"score":0.39800000190734863},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.37770000100135803},{"id":"https://openalex.org/C196423136","wikidata":"https://www.wikidata.org/wiki/Q7209671","display_name":"Policy-based routing","level":5,"score":0.366100013256073},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C50558702","wikidata":"https://www.wikidata.org/wiki/Q5535067","display_name":"Geographic routing","level":5,"score":0.35190001130104065},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.35190001130104065},{"id":"https://openalex.org/C9659607","wikidata":"https://www.wikidata.org/wiki/Q1268903","display_name":"Dynamic Source Routing","level":4,"score":0.3449999988079071},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.3296999931335449},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.32120001316070557},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.30149999260902405},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29809999465942383},{"id":"https://openalex.org/C108037233","wikidata":"https://www.wikidata.org/wiki/Q11375","display_name":"Wireless network","level":3,"score":0.28540000319480896},{"id":"https://openalex.org/C104954878","wikidata":"https://www.wikidata.org/wiki/Q1648707","display_name":"Routing protocol","level":3,"score":0.28040000796318054},{"id":"https://openalex.org/C2776061582","wikidata":"https://www.wikidata.org/wiki/Q25325231","display_name":"Mobile edge computing","level":3,"score":0.2802000045776367},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C184896649","wikidata":"https://www.wikidata.org/wiki/Q290066","display_name":"Routing table","level":4,"score":0.2637999951839447},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C195780805","wikidata":"https://www.wikidata.org/wiki/Q1535986","display_name":"Metrics","level":5,"score":0.2621999979019165}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.12001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.12001","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12001","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.9141137599945068,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"large":[1],"language":[2],"models":[3,31,35],"(LLMs)":[4],"move":[5],"from":[6,100],"centralized":[7,50],"clouds":[8],"to":[9,40,58,121,129,154,211],"mobile":[10,75],"edge":[11,34,68,76,132],"environments,":[12],"efficient":[13],"serving":[14],"must":[15],"balance":[16],"latency,":[17],"energy":[18,64],"consumption,":[19],"and":[20,32,53,63,85,116,204],"accuracy":[21],"under":[22,134,166],"constrained":[23],"device-edge":[24,90],"resources.":[25],"Query-level":[26],"routing":[27,78,91,174],"between":[28],"lightweight":[29,96],"on-device":[30,97],"stronger":[33],"provides":[36],"a":[37,80,88,95,117,142,181],"flexible":[38],"mechanism":[39],"navigate":[41],"this":[42,71],"trade-off.":[43],"However,":[44],"existing":[45],"routers":[46],"are":[47],"designed":[48],"for":[49,105],"cloud":[51],"settings":[52],"optimize":[54],"token-level":[55],"costs,":[56],"failing":[57],"capture":[59],"the":[60,130,135,162,167,173,199],"dynamic":[61],"latency":[62],"overheads":[65],"in":[66],"wireless":[67],"deployments.":[69],"In":[70],"paper,":[72],"we":[73],"formulate":[74],"LLM":[77],"as":[79],"deployment-constrained,":[81],"cost-aware":[82],"decision":[83],"problem,":[84],"propose":[86],"CR^2,":[87],"two-stage":[89],"framework.":[92],"CR^2":[93,178,196],"decouples":[94],"margin":[98,109],"gate":[99,110],"an":[101,155],"edge-side":[102],"utility":[103,169],"selector":[104],"deferred":[106],"queries.":[107],"The":[108],"operates":[111],"on":[112,172],"frozen":[113],"query":[114],"embeddings":[115],"user-specified":[118],"cost":[119,208],"weight":[120],"predict":[122],"whether":[123],"local":[124],"execution":[125],"is":[126],"utility-optimal":[127],"relative":[128],"best":[131],"alternative":[133],"target":[136],"operating":[137,152],"point.":[138],"We":[139],"further":[140],"introduce":[141],"conformal":[143],"risk":[144,165],"control":[145,160],"(CRC)":[146],"calibration":[147],"procedure":[148],"that":[149,177],"maps":[150],"each":[151],"point":[153],"acceptance":[156],"threshold,":[157],"enabling":[158],"explicit":[159],"of":[161],"marginal":[163],"false-acceptance":[164],"full-information":[168,182],"reference.":[170],"Experiments":[171],"task":[175],"show":[176],"closely":[179],"matches":[180],"reference":[183],"router":[184],"using":[185],"only":[186],"device-side":[187],"signals":[188],"before":[189],"deferral.":[190],"Compared":[191],"with":[192],"strong":[193],"query-level":[194],"baselines,":[195],"consistently":[197],"improves":[198],"deployable":[200],"accuracy-cost":[201],"Pareto":[202],"frontier":[203],"reduces":[205],"normalized":[206],"deployment":[207],"by":[209],"up":[210],"16.9%":[212],"at":[213],"matched":[214],"accuracy.":[215]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-14T00:00:00"}
