{"id":"https://openalex.org/W4401878036","doi":"https://doi.org/10.1145/3677387","title":"A Hybrid Future for AI","display_name":"A Hybrid Future for AI","publication_year":2024,"publication_date":"2024-08-26","ids":{"openalex":"https://openalex.org/W4401878036","doi":"https://doi.org/10.1145/3677387"},"language":"en","primary_location":{"id":"doi:10.1145/3677387","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3677387","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3677387","source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3677387","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105889741","display_name":"Chris Edwards","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chris Edwards","raw_affiliation_strings":["Surrey, Surrey, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Surrey, Surrey, United Kingdom","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5105889741"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14492942,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"67","issue":"10","first_page":"15","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11344","display_name":"Traffic Prediction and Management Techniques","score":0.9369999766349792,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11344","display_name":"Traffic Prediction and Management Techniques","score":0.9369999766349792,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligent-transportation-system","display_name":"Intelligent transportation system","score":0.6697608232498169},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5589994788169861},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.425419420003891},{"id":"https://openalex.org/keywords/transport-engineering","display_name":"Transport engineering","score":0.31848806142807007},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.2761375606060028},{"id":"https://openalex.org/keywords/aerospace-engineering","display_name":"Aerospace engineering","score":0.07460308074951172}],"concepts":[{"id":"https://openalex.org/C47796450","wikidata":"https://www.wikidata.org/wiki/Q508378","display_name":"Intelligent transportation system","level":2,"score":0.6697608232498169},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5589994788169861},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.425419420003891},{"id":"https://openalex.org/C22212356","wikidata":"https://www.wikidata.org/wiki/Q775325","display_name":"Transport engineering","level":1,"score":0.31848806142807007},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2761375606060028},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.07460308074951172}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3677387","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3677387","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3677387","source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3677387","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3677387","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3677387","source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401878036.pdf","grobid_xml":"https://content.openalex.org/works/W4401878036.grobid-xml"},"referenced_works_count":4,"referenced_works":["https://openalex.org/W4310561894","https://openalex.org/W4387596623","https://openalex.org/W4387995158","https://openalex.org/W4390529182"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032"],"abstract_inverted_index":{"A":[0],"Hybrid":[1],"Future":[2],"for":[3,6,138,472],"AIThe":[4],"drive":[5],"efficiency":[7,264],"brings":[8],"large":[9,76,391],"language":[10,77],"models":[11,78,80,137],"out":[12],"of":[13,24,31,41,55,63,111,124,156,175,178,185,193,210,241,270,300,359,369,387,450,453,469,481],"the":[14,22,27,64,67,94,112,121,168,191,205,246,251,267,279,301,331,339,349,365,371,430,451,467,478],"cloud.Nvidia's":[15],"rise":[16],"to":[17,85,135,182,190,203,215,336,343,407,412,429,455,458],"a":[18,39,108,141,144,221,239,295,310,315,324,390],"$2-trillion":[19],"valuation":[20],"at":[21,107,140,294,361],"beginning":[23],"2024":[25],"underlined":[26],"extraordinary":[28],"computing":[29,56],"demands":[30,309],"artificial":[32],"intelligence":[33],"systems":[34],"that":[35,45,201,256,404],"power":[36,54],"ChatGPT":[37],"and":[38,49,57,129,172,198,207,424,432,438,445,462],"host":[40],"other":[42],"cloud":[43,431,473],"services":[44,194],"create":[46],"videos,":[47],"music,":[48],"computer":[50],"programs":[51],"on":[52,75,120],"demand.The":[53],"memory":[58],"scaling":[59,105],"has":[60,99,117,143,188,329,335],"provided":[61],"much":[62,165],"impetus":[65],"behind":[66],"surge":[68],"in":[69,71,96,131,232,250,281,305,374,380,435],"interest":[70],"generative":[72],"AI":[73,363],"based":[74],"(LLMs).As":[79],"get":[81],"bigger":[82],"they":[83,383,464],"seem":[84],"harness":[86],"emergent":[87],"behavior,":[88],"making":[89],"them":[90],"more":[91,242,449],"useful.But,":[92],"as":[93,196],"growth":[95],"parameter":[97],"counts":[98],"easily":[100,236],"outstripped":[101],"Moore's":[102],"Law,":[103],"such":[104,195],"comes":[106],"high":[109],"cost.Much":[110],"concern":[113],"around":[114],"resource":[115],"usage":[116],"been":[118],"focused":[119],"enormous":[122,366],"arrays":[123],"graphics":[125],"processing":[126,220],"units":[127,373],"(GPUs)":[128],"accelerators":[130],"training":[132,437],"grids":[133],"used":[134,434],"train":[136],"weeks":[139],"time.Inferencing":[142],"far":[145],"lower":[146],"computational":[147],"demand":[148],"per":[149],"token":[150,317,326,334],"than":[151,243],"training,":[152,287],"but":[153],"an":[154,288],"influx":[155],"users":[157,414],"can":[158,290,394],"quickly":[159],"overwhelm":[160],"available":[161],"resources.That":[162],"limit":[163],"appears":[164],"sooner":[166],"if":[167],"queries":[169,401],"are":[170,377,405,415],"complex":[171],"contain":[173],"thousands":[174],"tokens,":[176],"each":[177,319,333,352],"which":[179,282],"roughly":[180],"equates":[181],"four":[183],"characters":[184],"text.The":[186],"problem":[187,274],"led":[189],"launch":[192],"GPT-For-Work":[197],"Artificial":[199,218],"Analysis":[200],"attempt":[202],"predict":[204],"financial":[206],"energy":[208],"cost":[209,273],"cloud-based":[211],"LLMs":[212,283,454],"before":[213,327],"deployment.According":[214],"estimates":[216],"from":[217],"Analysis,":[219],"million":[222,229],"tokens":[223],"using":[224],"OpenAI's":[225],"GPT4-Turbo":[226],"costs":[227],"$15.Twenty":[228],"requests":[230],"resulting":[231],"maximum-length":[233],"outputs":[234],"could":[235],"translate":[237],"into":[238],"bill":[240],"$1":[244],"million.However,":[245],"4o":[247],"version":[248],"launched":[249],"spring":[252],"would":[253],"likely":[254],"cut":[255],"number":[257],"by":[258,265,278,307,398],"50%,":[259],"although":[260],"it":[261,328],"also":[262],"gains":[263],"halving":[266],"maximum":[268],"length":[269],"its":[271],"output.The":[272],"is":[275],"made":[276],"worse":[277],"way":[280],"operate":[284],"during":[285],"inferencing.During":[286],"LLM":[289],"ingest":[291],"entire":[292,340],"sentences":[293],"time.This":[296],"makes":[297],"full":[298],"use":[299],"parallel":[302],"arithmetic":[303],"engines":[304],"GPUs.Inferencing,":[306],"contrast,":[308],"serial":[311],"feedback":[312],"loop":[313],"with":[314,389],"single":[316],"driving":[318],"iteration.\"The":[320],"model":[321],"can't":[322],"generate":[323],"new":[325],"produced":[330],"last.And":[332],"go":[337],"through":[338,348],"network.We":[341],"have":[342,477],"run":[344],"all":[345,440],"these":[346,441],"weights":[347],"compute":[350],"unit":[351],"time,\"":[353],"explained":[354],"Joseph":[355],"Soriaga,":[356],"senior":[357],"director":[358],"technology":[360],"Qualcomm":[362],"Research.Despite":[364],"memory-bandwidth":[367],"requirements":[368],"LLMs,":[370],"execution":[372],"GPUs":[375,470],"often":[376],"woefully":[378],"under-used":[379],"inferencing":[381],"because":[382],"must":[384],"work":[385,452],"serially.Operators":[386],"chatbots":[388],"user":[392],"base":[393],"gain":[395],"some":[396],"parallelization":[397],"batching":[399,410],"independent":[400],"together.Yet":[402],"applications":[403],"sensitive":[406],"latency":[408],"make":[409],"harder":[411],"justify.Also,":[413],"increasingly":[416],"concerned":[417],"about":[418],"privacy,":[419],"knowing":[420],"their":[421],"questions,":[422],"documents,":[423],"conversations":[425],"will":[426],"be":[427,456],"uploaded":[428],"possibly":[433],"downstream":[436],"fine-tuning.For":[439],"reasons,":[442],"many":[443],"researchers":[444],"commercial":[446],"suppliers":[447],"expect":[448],"offloaded":[457],"users'":[459],"own":[460],"devices":[461,476],"servers.Though":[463],"cannot":[465],"approach":[466],"performance":[468],"designed":[471],"servers,":[474],"consumer":[475],"clear":[479],"benefits":[480]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
