{"id":"https://openalex.org/W7135215141","doi":"https://doi.org/10.48550/arxiv.2603.11139","title":"H2LooP Spark Preview: Continual Pretraining of Large Language Models for Low-Level Embedded Systems Code","display_name":"H2LooP Spark Preview: Continual Pretraining of Large Language Models for Low-Level Embedded Systems Code","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135215141","doi":"https://doi.org/10.48550/arxiv.2603.11139"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11139","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11139","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11139","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129001819","display_name":"Amit Kumar Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Amit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129037856","display_name":"Vedant Nipane","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nipane, Vedant","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111774389","display_name":"Pulkit Agrawal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agrawal, Pulkit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128959904","display_name":"Jatin Kishnani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kishnani, Jatin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129548575","display_name":"Sairanjan Mishra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Sairanjan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.14710000157356262,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.14710000157356262,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.10670000314712524,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.0763000026345253,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.8396999835968018},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5867000222206116},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5418000221252441},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.5329999923706055},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.5159000158309937},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5123999714851379},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5085999965667725},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4514999985694885}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.8396999835968018},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187000155448914},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5867000222206116},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5547999739646912},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5418000221252441},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.5329999923706055},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.5159000158309937},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5123999714851379},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5085999965667725},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4514999985694885},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.44679999351501465},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3571999967098236},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3280999958515167},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.31929999589920044},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.30399999022483826},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C19024347","wikidata":"https://www.wikidata.org/wiki/Q211496","display_name":"High-level programming language","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C111065885","wikidata":"https://www.wikidata.org/wiki/Q1189053","display_name":"Fuzz testing","level":3,"score":0.26969999074935913},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11139","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11139","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11139","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11139","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,64],"models":[2,179],"(LLMs)":[3],"demonstrate":[4],"strong":[5],"code":[6,149],"generation":[7],"abilities":[8],"in":[9,16,44,109,168],"general-purpose":[10],"programming":[11],"languages":[12],"but":[13],"remain":[14],"limited":[15],"specialized":[17,185],"domains":[18],"such":[19],"as":[20,196],"low-level":[21],"embedded":[22,68,95,125,154],"systems":[23,69,96,183],"programming.":[24],"This":[25],"domain":[26,70],"involves":[27],"hardware":[28,38],"register":[29],"manipulation,":[30],"vendor-specific":[31],"SDKs,":[32],"real-time":[33],"operating":[34],"system":[35],"APIs,":[36],"and":[37,141,163],"abstraction":[39],"layers":[40],"that":[41,58,172],"are":[42],"underrepresented":[43],"standard":[45],"pretraining":[46,55,128,175],"corpora.":[47],"We":[48,188],"introduce":[49],"H2LooP":[50],"Spark":[51],"Preview,":[52],"a":[53],"continual":[54,174],"(CPT)":[56],"pipeline":[57],"adapts":[59],"the":[60,67,103,190],"OLMo-3-7B-a":[61],"fully":[62],"open":[63],"model":[65,158],"to":[66,180],"using":[71,102],"BF16":[72],"LoRA":[73,131],"with":[74,129],"rank-stabilized":[75],"scaling":[76],"on":[77,165,184,194],"8":[78,166],"NVIDIA":[79],"H100":[80],"GPUs.":[81],"Our":[82],"training":[83,192],"corpus":[84],"is":[85],"constructed":[86],"from":[87],"repository-datasheet":[88],"pairs":[89],"covering":[90],"100B":[91],"tokens":[92,122],"of":[93],"raw":[94],"data":[97],"across":[98,123],"117":[99],"manufacturers,":[100],"processed":[101],"hierarchical":[104],"datasheet-to-code":[105],"mapping":[106],"approach":[107],"proposed":[108],"SpecMap":[110],"(Nipane":[111],"et":[112],"al.,":[113],"2026).":[114],"The":[115],"resulting":[116],"curated":[117],"dataset":[118],"split":[119],"contains":[120],"23.5B":[121],"13":[124,153],"domains.":[126],"Continual":[127],"high-rank":[130],"(r=512)":[132],"yields":[133],"substantial":[134],"gains,":[135],"reducing":[136],"in-domain":[137],"perplexity":[138,144],"by":[139,145],"70.4%":[140],"held-out":[142],"repository":[143],"66.1%.":[146],"On":[147],"generative":[148],"completion":[150],"benchmarks":[151],"spanning":[152],"domains,":[155],"our":[156],"7B":[157],"outperforms":[159],"Claude":[160],"Opus":[161],"4.6":[162],"Qwen3-Coder-30B":[164],"categories":[167],"token":[169],"accuracy,":[170],"showing":[171],"targeted":[173],"enables":[176],"smaller":[177],"open-weight":[178],"rival":[181],"frontier":[182],"technical":[186],"tasks.":[187],"release":[189],"production":[191],"checkpoint":[193],"Huggingface":[195],"an":[197],"open-source":[198],"artifact.":[199]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
