{"id":"https://openalex.org/W7160336006","doi":"https://doi.org/10.48550/arxiv.2605.01423","title":"HepScript: A Dual-Use DSL for Human-AI Collaborative Data Analysis Workflows in High-Energy Physics","display_name":"HepScript: A Dual-Use DSL for Human-AI Collaborative Data Analysis Workflows in High-Energy Physics","publication_year":2026,"publication_date":"2026-05-02","ids":{"openalex":"https://openalex.org/W7160336006","doi":"https://doi.org/10.48550/arxiv.2605.01423"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.01423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.01423","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133486542","display_name":"Junkun Jiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Junkun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135312889","display_name":"Tong Liu","orcid":"https://orcid.org/0000-0001-5311-9687"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Tong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135391565","display_name":"Ke Li","orcid":"https://orcid.org/0000-0002-3694-1772"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135348993","display_name":"Weimin Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Weimin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104281612","display_name":"Yipu Liao","orcid":"https://orcid.org/0009-0000-1981-0044"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Yipu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039949904","display_name":"B. Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Bolun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135373306","display_name":"Beijiang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Beijiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128560179","display_name":"Chang-Zheng Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Chang-Zheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135398658","display_name":"Yue Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.6043999791145325,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.6043999791145325,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.1111999973654747,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.03370000049471855,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5702000260353088},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.5580999851226807},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.4887999892234802},{"id":"https://openalex.org/keywords/digital-subscriber-line","display_name":"Digital subscriber line","score":0.44359999895095825},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.37959998846054077},{"id":"https://openalex.org/keywords/domain-specific-language","display_name":"Domain-specific language","score":0.37119999527931213},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.36640000343322754},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.3562000095844269},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.3208000063896179}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7541999816894531},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.6050000190734863},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5720000267028809},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5702000260353088},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.5580999851226807},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.4887999892234802},{"id":"https://openalex.org/C201374245","wikidata":"https://www.wikidata.org/wiki/Q104534","display_name":"Digital subscriber line","level":2,"score":0.44359999895095825},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.3172999918460846},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C201677973","wikidata":"https://www.wikidata.org/wiki/Q1209840","display_name":"Specification language","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C147358964","wikidata":"https://www.wikidata.org/wiki/Q1200992","display_name":"Abstraction layer","level":3,"score":0.2976999878883362},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.296099990606308},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C15845906","wikidata":"https://www.wikidata.org/wiki/Q1172338","display_name":"Data exchange","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.2703999876976013},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.25839999318122864}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.01423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.01423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"escalating":[1],"data":[2,64],"scale":[3],"in":[4],"High-Energy":[5],"Physics":[6],"(HEP)":[7],"fuels":[8],"a":[9,22,52,57,70,79,139,160,167,175],"growing":[10],"aspiration":[11],"for":[12,62,86,97,151],"higher":[13],"analytical":[14],"efficiency.":[15],"While":[16],"Large":[17],"Language":[18,60],"Models":[19],"(LLMs)":[20],"offer":[21],"path":[23],"toward":[24,170],"automation":[25,196],"via":[26],"agentic":[27],"AI,":[28],"they":[29],"struggle":[30],"with":[31,159],"complex":[32],"scientific":[33],"workflows":[34],"that":[35,82],"require":[36],"deep":[37],"domain":[38],"knowledge":[39],"and":[40,89,190],"are":[41],"tightly":[42],"coupled":[43],"to":[44,146],"experiment-specific":[45],"codebases.":[46],"To":[47],"address":[48],"this,":[49],"we":[50],"introduce":[51],"methodology":[53],"centered":[54],"on":[55],"HepScript,":[56],"dual-use":[58],"Domain-Specific":[59],"(DSL)":[61],"HEP":[63,75],"analysis":[65,76,115,153],"workflows.":[66],"HepScript":[67,104],"serves":[68],"as":[69,180],"shared":[71],"formal":[72],"interface,":[73],"abstracting":[74],"logic":[77],"into":[78,117],"constrained":[80,136],"syntax":[81],"is":[83],"both":[84],"intuitive":[85],"human":[87,186],"experts":[88],"reliably":[90],"generable":[91],"by":[92,132],"AI":[93,144,188],"agents.":[94],"First":[95],"developed":[96],"the":[98,106,109,128],"Beijing":[99],"Spectrometer":[100],"III":[101],"(BESIII)":[102],"experiment,":[103],"hides":[105],"complexity":[107],"of":[108],"underlying":[110],"software":[111],"stack,":[112],"translating":[113],"high-level":[114],"intent":[116],"low-level,":[118],"production-ready":[119],"code.":[120],"In":[121],"our":[122],"case":[123],"studies,":[124],"this":[125],"abstraction":[126],"reduces":[127],"required":[129],"human-written":[130],"code":[131],"93\\%.":[133],"Crucially,":[134],"HepScript's":[135],"grammar":[137],"defines":[138],"tractable":[140],"action":[141],"space,":[142],"enabling":[143],"agents":[145],"autonomously":[147],"generate":[148],"executable":[149],"specifications":[150],"core":[152],"stages":[154],"directly":[155],"from":[156],"published":[157],"literature":[158],"95\\%":[161],"success":[162],"rate.":[163],"Our":[164],"work":[165],"demonstrates":[166],"scalable":[168],"pathway":[169],"human-AI":[171],"collaborative":[172],"systems,":[173],"where":[174],"formally":[176],"specified":[177],"DSL":[178],"acts":[179],"an":[181],"unambiguous":[182],"translation":[183],"layer":[184],"between":[185],"expertise,":[187],"automation,":[189],"production":[191],"environment,":[192],"rendering":[193],"previously":[194],"intractable":[195],"problems":[197],"solvable.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
