{"id":"https://openalex.org/W7160848377","doi":"https://doi.org/10.48550/arxiv.2605.07750","title":"Accelerating Precise End-to-End Simulation: Latency-Sensitive Many-core System Modeling","display_name":"Accelerating Precise End-to-End Simulation: Latency-Sensitive Many-core System Modeling","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160848377","doi":"https://doi.org/10.48550/arxiv.2605.07750"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07750","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07750","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07750","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121599309","display_name":"Yinrong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yinrong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092581645","display_name":"Fu Zexin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Zexin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135889720","display_name":"Yichao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yichao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078686831","display_name":"Germain Haugou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haugou, Germain","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135858107","display_name":"Chi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135833797","display_name":"Marco Bertuletti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bertuletti, Marco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135859504","display_name":"Bowen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135857109","display_name":"Luca Benini","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benini, Luca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.48159998655319214,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.48159998655319214,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.34779998660087585,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.12269999831914902,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6940000057220459},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.5497000217437744},{"id":"https://openalex.org/keywords/interconnection","display_name":"Interconnection","score":0.5264000296592712},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.5091999769210815},{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.47200000286102295},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.44679999351501465},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.4422999918460846},{"id":"https://openalex.org/keywords/systems-design","display_name":"Systems design","score":0.3700000047683716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8141999840736389},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6940000057220459},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.5497000217437744},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.5264000296592712},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.5091999769210815},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.47200000286102295},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.44679999351501465},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.4422999918460846},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4343999922275543},{"id":"https://openalex.org/C31352089","wikidata":"https://www.wikidata.org/wiki/Q3750474","display_name":"Systems design","level":2,"score":0.3700000047683716},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3684000074863434},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.35589998960494995},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C93682380","wikidata":"https://www.wikidata.org/wiki/Q2025226","display_name":"Static timing analysis","level":2,"score":0.33059999346733093},{"id":"https://openalex.org/C110963975","wikidata":"https://www.wikidata.org/wiki/Q12070446","display_name":"Systems modeling","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3156999945640564},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C167343916","wikidata":"https://www.wikidata.org/wiki/Q6888384","display_name":"Modeling and simulation","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C77495112","wikidata":"https://www.wikidata.org/wiki/Q5358436","display_name":"Electronic system-level design and verification","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C47822265","wikidata":"https://www.wikidata.org/wiki/Q854457","display_name":"Complex system","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C108710211","wikidata":"https://www.wikidata.org/wiki/Q11538","display_name":"Mathematical proof","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26510000228881836},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.2583000063896179},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.2549000084400177},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07750","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07750","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07750","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07750","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"large":[1],"language":[2],"model":[3,128,134],"workloads":[4],"put":[5],"increasing":[6],"demands":[7],"on":[8],"parallel":[9],"compute":[10,40],"capability":[11],"and":[12,22,28,41,66,99,155,162,180,183,196],"on-chip":[13],"memory":[14,36],"capacity,":[15],"while":[16,119,139],"also":[17,148],"stressing":[18],"fine-grained":[19],"data":[20,45],"movement":[21],"synchronization.":[23],"These":[24],"trends":[25],"motivate":[26],"exploring":[27],"designing":[29],"many-core":[30,88],"accelerators":[31],"with":[32,96,135],"tightly":[33],"coupled":[34],"scratchpad":[35],"(SPM)":[37],"for":[38,86],"scalable":[39],"predictable,":[42],"explicitly":[43],"managed":[44],"access.":[46],"However,":[47],"this":[48],"architectural":[49],"shift":[50],"raises":[51],"two":[52],"challenges:":[53],"cycle-accurate":[54,131],"register-transfer":[55],"level":[56],"(RTL)":[57],"simulation":[58],"becomes":[59],"prohibitively":[60],"slow":[61],"as":[62,94],"system":[63],"complexity":[64],"grows,":[65],"performance":[67],"estimation":[68],"requires":[69],"precise":[70],"modeling":[71,84],"of":[72,111,174,187],"latency-sensitive":[73,87,112],"interconnect":[74,117,178],"behavior.":[75],"This":[76],"paper":[77],"presents":[78],"a":[79,100,130],"fast":[80],"yet":[81],"accurate":[82],"end-to-end":[83,159],"approach":[85,107],"architectures,":[89],"targeting":[90],"large-scale":[91],"instances":[92],"such":[93],"TeraNoC":[95],"1024":[97],"cores":[98],"4MiB":[101],"globally":[102],"shared":[103],"L1":[104],"SPM.":[105],"The":[106,146],"captures":[108],"timing":[109],"behavior":[110],"SPM":[113],"accesses":[114],"across":[115,152],"multiple":[116],"scales,":[118],"abstracting":[120],"non-essential":[121],"hardware":[122,163],"details.":[123],"Across":[124],"diverse":[125],"benchmarks,":[126],"the":[127],"tracks":[129],"RTL":[132],"golden":[133],"errors":[136],"below":[137],"7%,":[138],"delivering":[140],"up":[141],"to":[142,176,192],"115x":[143],"faster":[144],"simulation.":[145],"framework":[147],"provides":[149],"detailed":[150],"profiling":[151],"processing":[153],"elements":[154],"interconnect,":[156],"enabling":[157],"efficient":[158],"software":[160],"development":[161],"design":[164,184],"exploration.":[165],"Two":[166],"case":[167],"studies":[168],"demonstrate":[169],"its":[170],"practicality:":[171],"profiling-guided":[172],"optimization":[173],"FlashAttention-2":[175],"reduce":[177],"stalls":[179],"synchronization":[181],"overhead,":[182],"space":[185],"exploration":[186],"network-on-chip":[188],"(NoC)":[189],"router":[190],"remapping":[191],"alleviate":[193],"traffic":[194],"imbalance":[195],"improve":[197],"throughput.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
