{"id":"https://openalex.org/W7115914205","doi":"https://doi.org/10.55056/jec.1047","title":"Performance analysis of localised large language models in resource-constrained edge for Python and Rust APIs","display_name":"Performance analysis of localised large language models in resource-constrained edge for Python and Rust APIs","publication_year":2025,"publication_date":"2025-12-18","ids":{"openalex":"https://openalex.org/W7115914205","doi":"https://doi.org/10.55056/jec.1047"},"language":"en","primary_location":{"id":"doi:10.55056/jec.1047","is_oa":true,"landing_page_url":"https://doi.org/10.55056/jec.1047","pdf_url":"https://acnsci.org/journal/index.php/jec/article/download/1047/944","source":{"id":"https://openalex.org/S4387288704","display_name":"Journal of Edge Computing","issn_l":"2837-181X","issn":["2837-181X"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4366728417","host_organization_name":"Academy of Cognitive and Natural Sciences","host_organization_lineage":["https://openalex.org/P4366728417"],"host_organization_lineage_names":["Academy of Cognitive and Natural Sciences"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Edge Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://acnsci.org/journal/index.php/jec/article/download/1047/944","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Partha Pratim Ray","orcid":"https://orcid.org/0000-0003-2306-2792"},"institutions":[{"id":"https://openalex.org/I43475992","display_name":"Sikkim University","ror":"https://ror.org/00wa05t61","country_code":"IN","type":"education","lineage":["https://openalex.org/I43475992"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Partha Pratim Ray","raw_affiliation_strings":["Sikkim University"],"raw_orcid":"https://orcid.org/0000-0003-2306-2792","affiliations":[{"raw_affiliation_string":"Sikkim University","institution_ids":["https://openalex.org/I43475992"]}]},{"author_position":"last","author":{"id":null,"display_name":"Mohan Pratap Pradhan","orcid":"https://orcid.org/0009-0007-8731-764X"},"institutions":[{"id":"https://openalex.org/I43475992","display_name":"Sikkim University","ror":"https://ror.org/00wa05t61","country_code":"IN","type":"education","lineage":["https://openalex.org/I43475992"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Mohan Pratap Pradhan","raw_affiliation_strings":["Sikkim University"],"raw_orcid":"https://orcid.org/0009-0007-8731-764X","affiliations":[{"raw_affiliation_string":"Sikkim University","institution_ids":["https://openalex.org/I43475992"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.3589,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.92856498,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"5","issue":"1","first_page":"47","last_page":"89"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.10279999673366547,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.10279999673366547,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.09690000116825104,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08820000290870667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.7946000099182129},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5519000291824341},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.49320000410079956},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4447999894618988},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4099999964237213},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.40059998631477356},{"id":"https://openalex.org/keywords/rust","display_name":"Rust (programming language)","score":0.34599998593330383},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.32420000433921814}],"concepts":[{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.7946000099182129},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.715399980545044},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5519000291824341},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.49320000410079956},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4447999894618988},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4174000024795532},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.40059998631477356},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3806000053882599},{"id":"https://openalex.org/C197781089","wikidata":"https://www.wikidata.org/wiki/Q575650","display_name":"Rust (programming language)","level":2,"score":0.34599998593330383},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.33219999074935913},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.3061999976634979},{"id":"https://openalex.org/C2988105877","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference system","level":5,"score":0.29159998893737793},{"id":"https://openalex.org/C151552104","wikidata":"https://www.wikidata.org/wiki/Q7705809","display_name":"Test suite","level":4,"score":0.28839999437332153},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28690001368522644},{"id":"https://openalex.org/C198370458","wikidata":"https://www.wikidata.org/wiki/Q586459","display_name":"Type inference","level":3,"score":0.2863999903202057},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2825999855995178},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.27140000462532043},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.26010000705718994}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.55056/jec.1047","is_oa":true,"landing_page_url":"https://doi.org/10.55056/jec.1047","pdf_url":"https://acnsci.org/journal/index.php/jec/article/download/1047/944","source":{"id":"https://openalex.org/S4387288704","display_name":"Journal of Edge Computing","issn_l":"2837-181X","issn":["2837-181X"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4366728417","host_organization_name":"Academy of Cognitive and Natural Sciences","host_organization_lineage":["https://openalex.org/P4366728417"],"host_organization_lineage_names":["Academy of Cognitive and Natural Sciences"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Edge Computing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:a28716437fd543c38cda28c895419246","is_oa":true,"landing_page_url":"https://doaj.org/article/a28716437fd543c38cda28c895419246","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Edge Computing, Vol 5, Iss 1 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.55056/jec.1047","is_oa":true,"landing_page_url":"https://doi.org/10.55056/jec.1047","pdf_url":"https://acnsci.org/journal/index.php/jec/article/download/1047/944","source":{"id":"https://openalex.org/S4387288704","display_name":"Journal of Edge Computing","issn_l":"2837-181X","issn":["2837-181X"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4366728417","host_organization_name":"Academy of Cognitive and Natural Sciences","host_organization_lineage":["https://openalex.org/P4366728417"],"host_organization_lineage_names":["Academy of Cognitive and Natural Sciences"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Edge Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7115914205.pdf","grobid_xml":"https://content.openalex.org/works/W7115914205.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W4362489973","https://openalex.org/W4391116828","https://openalex.org/W4391957569","https://openalex.org/W4395020691","https://openalex.org/W4399352194","https://openalex.org/W4400277194","https://openalex.org/W4402284085","https://openalex.org/W4402671659","https://openalex.org/W4403125540","https://openalex.org/W4403486309","https://openalex.org/W4403536783","https://openalex.org/W4404134026","https://openalex.org/W4404494894","https://openalex.org/W4405717632","https://openalex.org/W4406650295","https://openalex.org/W4408010674","https://openalex.org/W4408863244","https://openalex.org/W4409362928","https://openalex.org/W4410227661","https://openalex.org/W4410987966","https://openalex.org/W4411267085","https://openalex.org/W4411551908","https://openalex.org/W4411552197","https://openalex.org/W4411552217","https://openalex.org/W4413120526","https://openalex.org/W4415797390","https://openalex.org/W4416033994","https://openalex.org/W4417170249"],"related_works":[],"abstract_inverted_index":{"Edge":[0],"deployments":[1],"of":[2,16,33,82],"large":[3],"language":[4],"models":[5,192,284],"(LLMs)":[6],"often":[7],"suffer":[8],"from":[9,121,134],"significant":[10,215],"latency":[11],"due":[12],"to":[13,126,137],"the":[14,220],"overhead":[15,188],"high-level":[17],"client":[18,280],"runtimes":[19],"on":[20,48,247],"resource-constrained":[21],"hardware.":[22],"To":[23],"address":[24],"this":[25],"challenge,":[26],"we":[27,256],"conducted":[28],"a":[29,49,73,79,102],"side-by-side":[30],"performance":[31],"analysis":[32],"four":[34],"quantised":[35,283],"LLMs":[36],"\u2013":[37,47,85,97,162,184],"Llama":[38,131,166],"3.2:1b,":[39,132,167],"Gemma":[40,171],"3:1b,":[41,172],"Granite":[42,176],"3.1-MoE:1b,":[43],"and":[44,64,78,94,133,178,204,240,243,252,269,282],"Qwen":[45,141,182],"2.5:0.5b":[46,183],"Raspberry":[50],"Pi":[51],"4":[52],"Model":[53],"B":[54],"(8":[55],"GB":[56],"LPDDR4,":[57],"quad-core":[58],"ARM":[59],"Cortex-A72)":[60],"using":[61],"both":[62,155],"Python":[63],"Rust":[65,111],"API":[66],"clients.":[67],"Each":[68],"model":[69,117],"was":[70,98],"served":[71],"via":[72],"local":[74],"Ollama":[75],"inference":[76,227],"server,":[77],"fixed":[80],"suite":[81],"twenty":[83],"prompts":[84],"covering":[86],"factual":[87],"retrieval,":[88],"arithmetic":[89],"reasoning,":[90],"translation,":[91],"code":[92],"synthesis,":[93],"creative":[95],"generation":[96],"executed":[99],"sequentially":[100],"with":[101],"two-second":[103],"inter-request":[104],"delay,":[105],"yielding":[106],"160":[107],"measurements":[108],"per":[109],"client.":[110],"markedly":[112],"reduces":[113],"cold-start":[114],"delays:":[115],"mean":[116],"load":[118],"times":[119],"fall":[120],"1":[122],"648.7":[123],"ms":[124,128,136,139],"(Python)":[125],"52.8":[127],"(Rust)":[129],"for":[130,140,165,170,175,181,278],"607.0":[135],"171.3":[138],"2.5:0.5b.":[142],"Corresponding":[143],"end-to-end":[144],"latencies":[145],"decrease":[146],"by":[147],"1.4-2.0":[148],"s":[149],"across":[150],"models.":[151],"In":[152],"warm-start":[153],"conditions,":[154],"clients":[156],"deliver":[157],"nearly":[158],"identical":[159],"decoding":[160],"throughput":[161,223],"\u22482.7":[163],"tokens/s":[164,169,174,180],"4.4":[168],"7.4":[173],"3.1-MoE,":[177],"8.6":[179],"indicating":[185],"that":[186,209],"runtime":[187],"is":[189],"negligible":[190],"once":[191],"are":[193,213,228],"loaded.":[194],"Rigorous":[195],"statistical":[196],"testing,":[197],"including":[198],"paired":[199],"t-tests,":[200],"Mann-Whitney":[201],"U":[202],"tests,":[203],"bootstrap":[205],"confidence":[206],"intervals,":[207],"confirms":[208],"Rust\u2019s":[210],"coldstart":[211],"advantages":[212],"highly":[214],"(p":[216],"&lt;":[217],"0.01).":[218],"At":[219],"same":[221],"time,":[222],"differences":[224],"in":[225,235,260,285],"steady-state":[226],"not":[229],"statistically":[230],"meaningful.":[231],"We":[232],"discuss":[233],"limitations":[234],"platform":[236],"specificity,":[237],"quantisation":[238],"approaches,":[239],"prompt":[241],"diversity,":[242],"outline":[244],"future":[245],"work":[246],"heterogeneous":[248],"accelerators,":[249],"adaptive":[250],"scheduling,":[251],"ondevice":[253],"fine-tuning.":[254],"Finally,":[255],"highlight":[257],"practical":[258],"applications":[259],"smart":[261],"agriculture,":[262],"healthcare":[263],"monitoring,":[264],"industrial":[265],"IoT,":[266],"autonomous":[267],"robotics,":[268],"offline":[270],"educational":[271],"tools.":[272],"This":[273],"benchmark":[274],"furnishes":[275],"actionable":[276],"guidelines":[277],"selecting":[279],"languages":[281],"edge":[286],"AI":[287],"scenarios.":[288]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-19T00:00:00"}
