{"id":"https://openalex.org/W7131413067","doi":"https://doi.org/10.48550/arxiv.2602.18931","title":"WANSpec: Leveraging Global Compute Capacity for LLM Inference","display_name":"WANSpec: Leveraging Global Compute Capacity for LLM Inference","publication_year":2026,"publication_date":"2026-02-21","ids":{"openalex":"https://openalex.org/W7131413067","doi":"https://doi.org/10.48550/arxiv.2602.18931"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.18931","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.18931","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.18931","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126701123","display_name":"Noah Martin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Martin, Noah","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126686698","display_name":"Fahad Dogar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dogar, Fahad","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5126701123"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.2354000061750412,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.2354000061750412,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.08150000125169754,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.06790000200271606,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7670999765396118},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6266000270843506},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5393999814987183},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5350000262260437},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5252000093460083},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.33500000834465027}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8209999799728394},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7670999765396118},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6266000270843506},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5393999814987183},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5252000093460083},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.37779998779296875},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.33480000495910645},{"id":"https://openalex.org/C7545210","wikidata":"https://www.wikidata.org/wiki/Q838123","display_name":"Data redundancy","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.26669999957084656},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2632000148296356},{"id":"https://openalex.org/C2983523559","wikidata":"https://www.wikidata.org/wiki/Q410657","display_name":"On demand","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C104759252","wikidata":"https://www.wikidata.org/wiki/Q187120","display_name":"Time shifting","level":3,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.18931","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.18931","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.18931","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.18931","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"centers":[1,210],"capable":[2,39],"of":[3,51,69,86,121,201],"running":[4,20],"large":[5],"language":[6],"models":[7,24,34],"(LLMs)":[8],"are":[9,29,41],"spread":[10,112],"across":[11],"the":[12,21,47,54,67,89,96,125,168,172,198],"globe.":[13],"Some":[14],"have":[15,64],"high":[16,43,74,207],"end":[17],"GPUs":[18,40],"for":[19,32],"most":[22,38],"advanced":[23],"(100B+":[25],"parameters),":[26],"and":[27,180],"others":[28],"only":[30],"suitable":[31],"smaller":[33],"(1B":[35],"parameters).":[36],"The":[37],"under":[42],"demand":[44,208],"thanks":[45],"to":[46,57,72,82,88,124,147,161,171,190],"rapidly":[48],"expanding":[49],"applications":[50],"LLMs.":[52],"Choosing":[53],"right":[55],"location":[56],"run":[58],"an":[59],"LLM":[60,122],"inference":[61,87,100],"workload":[62],"can":[63,133,186],"consequences":[65],"on":[66],"latency":[68,194],"requests":[70],"due":[71],"these":[73],"demands.":[75],"In":[76,129],"this":[77],"work,":[78],"we":[79],"explore":[80],"options":[81],"shift":[83],"some":[84],"aspects":[85],"under-utilized":[90,126,173],"data":[91,127,209],"centers.":[92,128],"We":[93,114],"first":[94],"observe":[95],"varying":[97],"delays":[98],"affecting":[99],"in":[101,178,193,206],"AWS":[102],"services":[103],"from":[104],"different":[105],"regions,":[106],"demonstrating":[107],"that":[108,184],"load":[109],"is":[110,152],"not":[111],"evenly.":[113],"then":[115],"introduce":[116],"WANSpec,":[117],"which":[118],"offloads":[119],"part":[120],"generation":[123],"doing":[130],"so,":[131],"WANSpec":[132,185],"mitigate":[134],"capacity":[135],"issues":[136],"as":[137,139],"well":[138],"effectively":[140],"use":[141],"on-site":[142],"compute":[143,174],"(ie":[144],"at":[145],"universities)":[146],"augment":[148],"cloud":[149,181],"providers.":[150],"This":[151],"done":[153],"with":[154],"speculative":[155,202],"decoding,":[156,165],"a":[157],"widely":[158],"used":[159],"technique":[160],"speed":[162],"up":[163],"auto-regressive":[164],"by":[166,211],"moving":[167],"draft":[169,204],"model":[170,205],"resources.":[175],"Our":[176],"experiments":[177],"simulation":[179],"deployments":[182],"show":[183],"judiciously":[187],"employ":[188],"redundancy":[189],"avoid":[191],"increases":[192],"while":[195],"still":[196],"reducing":[197],"forward":[199],"passes":[200],"decoding's":[203],"over":[212],"50%.":[213]},"counts_by_year":[],"updated_date":"2026-02-26T06:34:08.959763","created_date":"2026-02-26T00:00:00"}
