{"id":"https://openalex.org/W7155179510","doi":"https://doi.org/10.48550/arxiv.2604.19342","title":"Are Large Language Models Economically Viable for Industry Deployment?","display_name":"Are Large Language Models Economically Viable for Industry Deployment?","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155179510","doi":"https://doi.org/10.48550/arxiv.2604.19342"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.19342","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19342","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.19342","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134230359","display_name":"Abdullah Mohammad","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mohammad, Abdullah","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123504691","display_name":"Sushant Kumar Ray","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ray, Sushant Kumar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074019706","display_name":"Pushkar Arora","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arora, Pushkar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068692517","display_name":"Rafiq Ali","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ali, Rafiq","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119042124","display_name":"Ebad Shabbir","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shabbir, Ebad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134246662","display_name":"Gautam Siddharth Kashyap","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kashyap, Gautam Siddharth","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012051562","display_name":"Jiechao Gao","orcid":"https://orcid.org/0000-0003-0628-1416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Jiechao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134307879","display_name":"Usman Naseem","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Naseem, Usman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5134230359"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.2493000030517578,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.2493000030517578,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.09179999679327011,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0820000022649765,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6773999929428101},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.45890000462532043},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.37700000405311584},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.37290000915527344},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.3693999946117401},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.3601999878883362},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.35589998960494995},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.3336000144481659}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6773999929428101},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.635200023651123},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.45890000462532043},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.37700000405311584},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.37290000915527344},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.3601999878883362},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.3336000144481659},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.3001999855041504},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C2776650193","wikidata":"https://www.wikidata.org/wiki/Q264661","display_name":"Obstacle","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C59594135","wikidata":"https://www.wikidata.org/wiki/Q5249242","display_name":"Decision model","level":2,"score":0.25360000133514404}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.19342","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19342","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.19342","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19342","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6942010521888733,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generative":[0],"AI-powered":[1],"by":[2,185],"Large":[3],"Language":[4],"Models":[5],"(LLMs)-is":[6],"increasingly":[7],"deployed":[8],"in":[9,62,134,152,197],"industry":[10],"across":[11,78,93,142],"healthcare":[12],"decision":[13],"support,":[14],"financial":[15],"analytics,":[16],"enterprise":[17],"retrieval,":[18],"and":[19,25,41,59,90,113,124,144,164],"conversational":[20],"automation,":[21],"where":[22],"reliability,":[23],"efficiency,":[24,119],"cost":[26],"control":[27],"are":[28],"critical.":[29],"In":[30],"such":[31],"settings,":[32],"models":[33],"must":[34],"satisfy":[35],"strict":[36],"constraints":[37],"on":[38,82],"energy,":[39],"latency,":[40],"hardware":[42,120],"utilization-not":[43],"accuracy":[44],"alone.":[45],"Yet":[46],"prevailing":[47,192],"evaluation":[48],"pipelines":[49],"remain":[50],"accuracy-centric,":[51],"creating":[52],"a":[53,130],"Deployment-Evaluation":[54],"Gap-the":[55],"absence":[56],"of":[57],"operational":[58],"economic":[60,143],"criteria":[61],"model":[63],"assessment.":[64],"To":[65],"address":[66],"this":[67],"gap,":[68],"we":[69,97],"present":[70],"EDGE-EVAL-a":[71],"industry-oriented":[72],"benchmarking":[73],"framework":[74],"that":[75],"evaluates":[76],"LLMs":[77],"their":[79],"full":[80],"lifecycle":[81],"legacy":[83],"NVIDIA":[84],"Tesla":[85],"T4":[86],"GPUs.":[87],"Benchmarking":[88],"LLaMA":[89],"Qwen":[91],"variants":[92],"three":[94],"industrial":[95],"tasks,":[96],"introduce":[98],"five":[99],"deployment":[100],"metrics-Economic":[101],"Break-Even":[102],"(Nbreak),":[103],"Intelligence-Per-Watt":[104],"(IPW":[105],"),":[106],"System":[107],"Density":[108],"(\\r{ho}sys),":[109],"Cold-Start":[110],"Tax":[111],"(Ctax),":[112],"Quantization":[114],"Fidelity":[115],"(Qret)-capturing":[116],"profitability,":[117],"energy":[118,184],"scaling,":[121],"serverless":[122],"feasibility,":[123],"compression":[125],"safety.":[126],"Our":[127],"results":[128],"reveal":[129],"clear":[131],"efficiency":[132,175],"frontier-models":[133],"the":[135],"&lt;2B":[136],"parameter":[137],"class":[138],"dominate":[139],"larger":[140],"baselines":[141],"ecological":[145],"dimensions.":[146],"LLaMA-3.2-1B":[147],"(INT4)":[148],"achieves":[149],"ROI":[150],"break-even":[151],"14":[153],"requests":[154],"(median),":[155],"delivers":[156],"3x":[157],"higher":[158],"energy-normalized":[159],"intelligence":[160],"than":[161],"7B":[162],"models,":[163],"exceeds":[165],"6,900":[166],"tokens/s/GB":[167],"under":[168],"4-bit":[169],"quantization.":[170],"We":[171],"further":[172],"uncover":[173],"an":[174],"anomaly-while":[176],"QLoRA":[177],"reduces":[178],"memory":[179],"footprint,":[180],"it":[181],"increases":[182],"adaptation":[183],"up":[186],"to":[187],"7x":[188],"for":[189],"small":[190],"models-challenging":[191],"assumptions":[193],"about":[194],"quantization-aware":[195],"training":[196],"edge":[198],"deployment.":[199]},"counts_by_year":[],"updated_date":"2026-04-23T06:20:18.424754","created_date":"2026-04-23T00:00:00"}
