{"id":"https://openalex.org/W7147156870","doi":"https://doi.org/10.48550/arxiv.2603.28381","title":"Warp-STAR: High-performance, Differentiable GPU-Accelerated Static Timing Analysis through Warp-oriented Parallel Orchestration","display_name":"Warp-STAR: High-performance, Differentiable GPU-Accelerated Static Timing Analysis through Warp-oriented Parallel Orchestration","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7147156870","doi":"https://doi.org/10.48550/arxiv.2603.28381"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.28381","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.28381","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067971879","display_name":"En-Ming Huang","orcid":"https://orcid.org/0000-0003-2196-2834"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, En-Ming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020028710","display_name":"Shih\u2010Hao Hung","orcid":"https://orcid.org/0000-0003-2043-2663"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hung, Shih-Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.45260000228881836,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.45260000228881836,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.26759999990463257,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.10559999942779541,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8913000226020813},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6147000193595886},{"id":"https://openalex.org/keywords/occam","display_name":"occam","score":0.48410001397132874},{"id":"https://openalex.org/keywords/static-timing-analysis","display_name":"Static timing analysis","score":0.4169999957084656},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4138999879360199},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.4009000062942505},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.38760000467300415},{"id":"https://openalex.org/keywords/differentiable-function","display_name":"Differentiable function","score":0.3772999942302704}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8913000226020813},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7477999925613403},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6147000193595886},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5027999877929688},{"id":"https://openalex.org/C78469957","wikidata":"https://www.wikidata.org/wiki/Q838062","display_name":"occam","level":2,"score":0.48410001397132874},{"id":"https://openalex.org/C93682380","wikidata":"https://www.wikidata.org/wiki/Q2025226","display_name":"Static timing analysis","level":2,"score":0.4169999957084656},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4138999879360199},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.38760000467300415},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.3772999942302704},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35109999775886536},{"id":"https://openalex.org/C32946077","wikidata":"https://www.wikidata.org/wiki/Q618079","display_name":"Network analysis","level":2,"score":0.34860000014305115},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C97686452","wikidata":"https://www.wikidata.org/wiki/Q7604153","display_name":"Static analysis","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C64260653","wikidata":"https://www.wikidata.org/wiki/Q1194864","display_name":"Electronic design automation","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.25760000944137573},{"id":"https://openalex.org/C147297375","wikidata":"https://www.wikidata.org/wiki/Q6674930","display_name":"Look-ahead","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.28381","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.28381","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Static":[0],"timing":[1],"analysis":[2,96],"(STA)":[3],"is":[4],"crucial":[5],"for":[6,93],"Electronic":[7],"Design":[8],"Automation":[9],"(EDA)":[10],"flows":[11],"but":[12],"remains":[13],"a":[14,43,63,75,82],"computational":[15],"bottleneck.":[16],"While":[17],"existing":[18],"GPU-based":[19,70],"STA":[20,46],"engines":[21],"are":[22],"faster":[23],"than":[24],"CPU,":[25],"they":[26],"suffer":[27],"from":[28],"inefficiencies,":[29],"particularly":[30],"intra-warp":[31],"load":[32],"imbalance":[33,51],"caused":[34],"by":[35,52],"irregular":[36],"circuit":[37],"graphs.":[38],"This":[39,60],"paper":[40],"introduces":[41],"Warp-STAR,":[42],"novel":[44],"GPU-accelerated":[45],"engine":[47],"that":[48],"eliminates":[49],"this":[50],"orchestrating":[53],"parallel":[54],"computations":[55],"at":[56],"the":[57],"warp":[58],"level.":[59],"approach":[61],"achieves":[62],"2.4X":[64],"speedup":[65,84],"over":[66,85],"previous":[67],"state-of-the-art":[68],"(SoTA)":[69],"STA.":[71],"When":[72],"integrated":[73],"into":[74],"timing-driven":[76],"global":[77],"placement":[78],"framework,":[79],"Warp-STAR":[80],"delivers":[81],"1.7X":[83],"SoTA":[86],"frameworks.":[87],"The":[88],"method":[89],"also":[90],"proves":[91],"effective":[92],"differentiable":[94],"gradient":[95],"with":[97],"minimal":[98],"overhead.":[99]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-02T00:00:00"}
