{"id":"https://openalex.org/W4415003742","doi":"https://doi.org/10.1109/mlcad65511.2025.11189223","title":"ML-Enhanced Performance and Power Estimation for DNNs on Heterogenous SoCs","display_name":"ML-Enhanced Performance and Power Estimation for DNNs on Heterogenous SoCs","publication_year":2025,"publication_date":"2025-09-08","ids":{"openalex":"https://openalex.org/W4415003742","doi":"https://doi.org/10.1109/mlcad65511.2025.11189223"},"language":"en","primary_location":{"id":"doi:10.1109/mlcad65511.2025.11189223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlcad65511.2025.11189223","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 ACM/IEEE 7th Symposium on Machine Learning for CAD (MLCAD)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022503279","display_name":"Surya Selvam","orcid":"https://orcid.org/0000-0002-0300-5478"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Surya Selvam","raw_affiliation_strings":["Purdue University,Elmore Family School of ECE"],"affiliations":[{"raw_affiliation_string":"Purdue University,Elmore Family School of ECE","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044634700","display_name":"Jacob R. Stevens","orcid":"https://orcid.org/0000-0002-7626-2846"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jacob R. Stevens","raw_affiliation_strings":["Purdue University,Elmore Family School of ECE"],"affiliations":[{"raw_affiliation_string":"Purdue University,Elmore Family School of ECE","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105369696","display_name":"Sujit Dey","orcid":"https://orcid.org/0000-0001-9671-3950"},"institutions":[{"id":"https://openalex.org/I160856358","display_name":"University of San Diego","ror":"https://ror.org/03jbbze48","country_code":"US","type":"education","lineage":["https://openalex.org/I160856358"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sujit Dey","raw_affiliation_strings":["University of California San Diego,Department of ECE"],"affiliations":[{"raw_affiliation_string":"University of California San Diego,Department of ECE","institution_ids":["https://openalex.org/I160856358","https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065766721","display_name":"Anand Raghunathan","orcid":"https://orcid.org/0000-0002-4624-564X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anand Raghunathan","raw_affiliation_strings":["Purdue University,Elmore Family School of ECE"],"affiliations":[{"raw_affiliation_string":"Purdue University,Elmore Family School of ECE","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5022503279"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29951695,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6245999932289124},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5891000032424927},{"id":"https://openalex.org/keywords/tree-traversal","display_name":"Tree traversal","score":0.5820000171661377},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.49540001153945923},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.4796999990940094},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.4341000020503998},{"id":"https://openalex.org/keywords/graph-traversal","display_name":"Graph traversal","score":0.39309999346733093},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.38999998569488525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8641999959945679},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6245999932289124},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5891000032424927},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.5820000171661377},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.42640000581741333},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4180999994277954},{"id":"https://openalex.org/C96333769","wikidata":"https://www.wikidata.org/wiki/Q907955","display_name":"Graph traversal","level":3,"score":0.39309999346733093},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.38999998569488525},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3743000030517578},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3497999906539917},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30970001220703125},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.3043999969959259},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2874000072479248},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.27889999747276306},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.27810001373291016},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2669999897480011},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.2524999976158142},{"id":"https://openalex.org/C2779639559","wikidata":"https://www.wikidata.org/wiki/Q7661178","display_name":"Symbolic execution","level":3,"score":0.2513999938964844},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlcad65511.2025.11189223","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlcad65511.2025.11189223","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 ACM/IEEE 7th Symposium on Machine Learning for CAD (MLCAD)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2101686415","https://openalex.org/W2116493640","https://openalex.org/W2132022337","https://openalex.org/W2183341477","https://openalex.org/W2295598076","https://openalex.org/W2964081807","https://openalex.org/W2995763407","https://openalex.org/W3110460514","https://openalex.org/W3113750219","https://openalex.org/W3135013702","https://openalex.org/W3157708976","https://openalex.org/W3158233068","https://openalex.org/W4220917304","https://openalex.org/W4226185248"],"related_works":[],"abstract_inverted_index":{"Efficient":[0],"deployment":[1,223],"of":[2,22,75,149,187,224],"Deep":[3],"Neural":[4],"Networks":[5],"(DNNs)":[6],"on":[7,49,168,176,226],"heterogeneous":[8,76,227],"Systems-on-Chip":[9],"(SoCs)":[10],"requires":[11,40],"navigating":[12],"a":[13,98,122],"large":[14],"and":[15,29,86,101,114,117,135,147,159,163,171,190,220],"complex":[16],"design":[17],"space":[18,39],"defined":[19],"by":[20],"combinations":[21],"choices":[23],"in":[24,92],"network":[25],"architectures,":[26],"operator-to-processor":[27],"mappings,":[28,130],"power/performance":[30],"operating":[31],"points":[32],"for":[33,112],"each":[34],"processor.":[35],"Exploring":[36],"this":[37,62],"vast":[38],"estimators":[41,56],"that":[42,104,126,210],"are":[43,140],"faster":[44,200],"than":[45,201],"even":[46],"real-time":[47],"execution":[48,78,145,157,161,208],"the":[50,73,144,177],"target":[51],"hardware.":[52],"Machine":[53],"learning":[54],"(ML)-based":[55],"have":[57],"been":[58],"proposed":[59],"to":[60,69,142,217],"address":[61],"challenge.":[63],"However,":[64],"prior":[65,194],"ML-based":[66],"methods":[67,195],"fail":[68],"accurately":[70],"capture":[71],"all":[72],"effects":[74],"parallel":[77],"such":[79],"as":[80],"parallelism":[81],"across":[82],"processors,":[83,134],"inter-processor":[84],"communication,":[85],"control-flow":[87],"dependencies":[88],"from":[89],"offloading,":[90],"resulting":[91],"significant":[93],"estimation":[94,185],"errors.We":[95],"introduce":[96],"CoCO-ML,":[97],"Concurrency,":[99],"Communication,":[100],"Offload-aware":[102],"framework":[103],"integrates":[105],"ML":[106,138],"models":[107,128,139],"with":[108],"classical":[109],"graph-based":[110],"algorithms":[111],"fast":[113],"accurate":[115],"performance":[116],"power":[118,148,164],"estimation.":[119],"CoCO-ML":[120,167,182,205],"constructs":[121],"Computation-Communication":[123],"Graph":[124],"(CCG)":[125],"explicitly":[127],"operator":[129],"data":[131],"transfers":[132],"between":[133],"offload":[136],"overheads.":[137],"used":[141],"predict":[143],"time":[146,162],"individual":[150],"operators,":[151],"while":[152,196],"symbolic":[153,207],"graph":[154],"traversal":[155],"produces":[156],"traces":[158,209],"end-to-end":[160],"estimates.We":[165],"evaluate":[166],"four":[169],"single-DNN":[170],"two":[172],"multi-DNN":[173],"workloads":[174],"executed":[175],"NVIDIA":[178],"Jetson":[179],"AGX":[180],"SoC.":[181],"achieves":[183],"mean":[184],"errors":[186],"6.54%":[188],"(performance)":[189],"12.51%":[191],"(power),":[192],"outperforming":[193],"being":[197],"over":[198],"1000\u00d7":[199],"native":[202],"execution.":[203],"Furthermore,":[204],"generates":[206],"expose":[211],"application":[212,218],"bottlenecks,":[213],"providing":[214],"actionable":[215],"insights":[216],"developers":[219],"facilitating":[221],"efficient":[222],"DNNs":[225],"SoCs.":[228]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
