{"id":"https://openalex.org/W7154506812","doi":"https://doi.org/10.48550/arxiv.2604.12090","title":"Evaluating Cross-Architecture Performance Modeling of Distributed ML Workloads Using StableHLO","display_name":"Evaluating Cross-Architecture Performance Modeling of Distributed ML Workloads Using StableHLO","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154506812","doi":"https://doi.org/10.48550/arxiv.2604.12090"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.12090","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12090","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.12090","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003103657","display_name":"Jonas Svedas","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Svedas, Jonas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059602336","display_name":"Nathan Laubeuf","orcid":"https://orcid.org/0000-0002-1592-755X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Laubeuf, Nathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133637182","display_name":"Ryan Harvey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harvey, Ryan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133722044","display_name":"Arjun Singh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Arjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133693474","display_name":"Changhai Man","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Man, Changhai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117071312","display_name":"Abubakr Nada","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nada, Abubakr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124285599","display_name":"Tushar Krishna","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishna, Tushar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130960288","display_name":"James Myers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Myers, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133629094","display_name":"Debjyoti Bhattacharjee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhattacharjee, Debjyoti","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5003103657"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.44920000433921814,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.44920000433921814,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.1281999945640564,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.04879999905824661,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.7355999946594238},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.692799985408783},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.6804999709129333},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4487999975681305},{"id":"https://openalex.org/keywords/performance-prediction","display_name":"Performance prediction","score":0.4043999910354614},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.3984000086784363},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.32350000739097595},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.3109999895095825}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8668000102043152},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.7355999946594238},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.692799985408783},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.6804999709129333},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5760999917984009},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4487999975681305},{"id":"https://openalex.org/C2777115002","wikidata":"https://www.wikidata.org/wiki/Q7168246","display_name":"Performance prediction","level":2,"score":0.4043999910354614},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3246000111103058},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.32350000739097595},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.322299987077713},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C42112101","wikidata":"https://www.wikidata.org/wiki/Q5283146","display_name":"Distributed design patterns","level":3,"score":0.26440000534057617},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.12090","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12090","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.12090","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12090","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.500374436378479,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Predicting":[0],"the":[1,165,201],"performance":[2,67,88,138,187],"of":[3,69],"large-scale":[4],"distributed":[5,32,70,123,185],"machine":[6],"learning":[7],"(ML)":[8],"workloads":[9,99,132],"across":[10,102,116,140,189],"multiple":[11,87],"accelerator":[12,190],"architectures":[13,141,191],"remains":[14],"a":[15,59,76,82,180],"central":[16],"challenge":[17],"in":[18,170],"ML":[19,71,186,202],"system":[20,203],"design.":[21],"Existing":[22],"GPU":[23,172],"and":[24,46,65,93,104,127,142,148,164,192,198],"TPU":[25],"focused":[26],"simulators":[27,34],"are":[28,100],"typically":[29],"architecture-specific,":[30],"while":[31,144],"training":[33,131],"rely":[35],"on":[36],"workload-specific":[37],"analytical":[38],"models":[39],"or":[40],"costly":[41],"post-execution":[42],"traces,":[43],"limiting":[44],"portability":[45],"cross-platform":[47],"comparison.":[48],"This":[49],"work":[50],"evaluates":[51],"whether":[52],"MLIR's":[53],"StableHLO":[54,135,178],"dialect":[55],"can":[56],"serve":[57],"as":[58],"unified":[60],"workload":[61,84],"representation":[62,85],"for":[63,160,183],"cross-architecture":[64],"cross-fidelity":[66],"modeling":[68,117,188],"workloads.":[72],"The":[73],"study":[74],"establishes":[75],"StableHLO-based":[77],"simulation":[78],"methodology":[79,166],"that":[80,134,177],"maps":[81],"single":[83],"onto":[86],"models,":[89],"spanning":[90],"analytical,":[91],"profiling-based,":[92],"simulator-driven":[94],"predictors.":[95],"Using":[96],"this":[97],"methodology,":[98],"evaluated":[101,152],"GPUs":[103],"TPUs":[105],"without":[106],"requiring":[107],"access":[108],"to":[109],"scaled-out":[110],"physical":[111],"systems,":[112],"enabling":[113],"systematic":[114],"comparison":[115],"fidelities.":[118],"An":[119],"empirical":[120],"evaluation":[121,196],"covering":[122],"GEMM":[124],"kernels,":[125],"ResNet,":[126],"large":[128],"language":[129],"model":[130],"demonstrates":[133],"preserves":[136],"relative":[137],"trends":[139],"fidelities,":[143],"exposing":[145],"accuracy":[146],"trade-offs":[147],"simulator":[149],"limitations.":[150],"Across":[151],"scenarios,":[153],"prediction":[154],"errors":[155],"remain":[156],"within":[157],"practical":[158],"bounds":[159],"early-stage":[161],"design":[162,204],"exploration,":[163],"reveals":[167],"fidelity-dependent":[168],"limitations":[169],"existing":[171],"simulators.":[173],"These":[174],"results":[175],"indicate":[176],"provides":[179],"viable":[181],"foundation":[182],"unified,":[184],"simulators,":[193],"supporting":[194],"reusable":[195],"workflows":[197],"cross-validation":[199],"throughout":[200],"process.":[205]},"counts_by_year":[],"updated_date":"2026-04-16T06:09:31.884825","created_date":"2026-04-16T00:00:00"}
