{"id":"https://openalex.org/W7162454159","doi":"https://doi.org/10.48550/arxiv.2605.25645","title":"Fine-Tuning and Serving Gemma 4 31B on Google Cloud TPU: A Technical Comparison with GPU Baselines","display_name":"Fine-Tuning and Serving Gemma 4 31B on Google Cloud TPU: A Technical Comparison with GPU Baselines","publication_year":2026,"publication_date":"2026-05-25","ids":{"openalex":"https://openalex.org/W7162454159","doi":"https://doi.org/10.48550/arxiv.2605.25645"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.25645","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25645","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.25645","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137009371","display_name":"Jatin Kishnani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kishnani, Jatin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030011411","display_name":"Mayank Goel","orcid":"https://orcid.org/0000-0003-1237-7545"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goel, Mayank","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136993924","display_name":"Amit Kumar Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Amit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137061468","display_name":"Pulkit Agrawal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agrawal, Pulkit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137051143","display_name":"Sairanjan Mishra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Sairanjan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6812000274658203,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6812000274658203,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.03799999877810478,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.02979999966919422,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5684000253677368},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.46970000863075256},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4009999930858612},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.3986000120639801},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.392300009727478},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3637000024318695}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7730000019073486},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5684000253677368},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.46970000863075256},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4009999930858612},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3986000120639801},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3091000020503998},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3046000003814697},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29120001196861267},{"id":"https://openalex.org/C4924752","wikidata":"https://www.wikidata.org/wiki/Q184148","display_name":"Plug-in","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.25645","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25645","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.25645","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25645","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.6798242330551147}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"the":[2,49,72,105,118,147,160,243,260],"first":[3],"end-to-end":[4],"demonstration":[5],"of":[6,22,52],"fine-tuning":[7],"and":[8,24,41,68,94,116,121,158,163,182,213,222,247],"serving":[9],"Google's":[10],"Gemma":[11,112,154,254],"4":[12,113,155,255],"31B":[13,257],"model":[14,30],"on":[15,34,64,114,156,259],"TPU":[16,23,37,42,134,193,204,261],"hardware,":[17],"providing":[18],"an":[19],"empirical":[20],"comparison":[21],"GPU":[25,129,187,207],"platforms":[26],"for":[27,39,45,253],"large":[28],"language":[29],"adaptation.":[31],"Using":[32],"LoRA":[33,82],"a":[35,58,95,126,167,185,239,251],"Google":[36],"v5p-8":[38],"training":[40,60,135,195],"v6e-8":[43,115,157,205],"(Trillium)":[44],"inference,":[46,102,144,203],"we":[47,103,145,179],"document":[48],"full":[50],"set":[51],"code-level":[53],"adaptations":[54,78],"required":[55,151],"to":[56,71,110,152,172],"port":[57],"GPU-native":[59],"recipe":[61,252],"-":[62,70],"built":[63],"PyTorch,":[65],"HuggingFace":[66],"TRL,":[67],"FSDP":[69],"JAX":[73],"+":[74],"Tunix/Qwix":[75],"stack.":[76],"These":[77],"span":[79],"mesh":[80],"configuration,":[81],"module":[83],"naming":[84],"conventions,":[85],"sharding":[86],"annotation":[87],"corrections,":[88],"gradient":[89],"checkpoint,":[90],"data":[91],"pipeline":[92],"restructuring,":[93],"custom":[96],"Orbax-to-safetensor":[97],"checkpoint":[98],"merging":[99],"procedure.":[100],"For":[101,143,202],"detail":[104],"vLLM-TPU":[106,148],"Docker":[107,149],"setup":[108,150],"necessary":[109],"serve":[111,153],"characterize":[117],"resulting":[119],"latency":[120,162],"throughput":[122,164,221],"profile.":[123],"Compared":[124],"with":[125,250],"similar-costing":[127],"2xH100":[128,186],"baseline":[130,188],"under":[131],"identical":[132,190],"hyperparameters,":[133],"completes":[136,194],"1.61x":[137,196],"faster":[138,197,224],"at":[139,198,208,216,226,234],"2.12x":[140,199],"lower":[141,200],"cost.":[142,201],"cover":[146],"explain":[159],"observed":[161],"characteristics":[165],"across":[166],"QPS":[168],"sweep":[169],"spanning":[170],"512":[171],"16k":[173],"input":[174],"tokens.":[175],"Across":[176],"both":[177],"workloads":[178],"compare":[180],"performance":[181],"cost":[183],"against":[184],"running":[189],"hyperparameters.":[191],"The":[192],"matches":[206],"short":[209],"context":[210],"(&lt;=2048":[211],"tokens)":[212],"decisively":[214],"outperforms":[215],"long":[217],"context:":[218],"66%":[219],"higher":[220],"23.6x":[223],"TTFT":[225],"4096-token":[227],"inputs":[228],"(61":[229],"ms":[230,233],"vs":[231],"1,443":[232],"QPS=4).":[235],"Our":[236],"work":[237],"removes":[238],"critical":[240],"gap":[241],"in":[242],"open":[244],"tooling":[245],"ecosystem":[246],"provides":[248],"practitioners":[249],"Dense":[256],"deployment":[258],"infrastructure.":[262]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-27T00:00:00"}
