{"id":"https://openalex.org/W4416342642","doi":"https://doi.org/10.1109/socc66126.2025.11235386","title":"Exploring RISC-V Instruction-Level Optimization Through Macro-Operation Fusion for TensorFlow-based Models","display_name":"Exploring RISC-V Instruction-Level Optimization Through Macro-Operation Fusion for TensorFlow-based Models","publication_year":2025,"publication_date":"2025-09-29","ids":{"openalex":"https://openalex.org/W4416342642","doi":"https://doi.org/10.1109/socc66126.2025.11235386"},"language":null,"primary_location":{"id":"doi:10.1109/socc66126.2025.11235386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc66126.2025.11235386","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 38th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047096414","display_name":"Mohamed I. A. Othman","orcid":"https://orcid.org/0000-0002-0405-8918"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmad Othman","raw_affiliation_strings":["Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nico R\u00f6der","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nico R\u00f6der","raw_affiliation_strings":["Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080510386","display_name":"Ahmed Kamaleldin","orcid":"https://orcid.org/0000-0002-7446-7741"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmed Kamaleldin","raw_affiliation_strings":["Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060626530","display_name":"Diana G\u00f6hringer","orcid":"https://orcid.org/0000-0003-2571-8441"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Diana G\u00f6hringer","raw_affiliation_strings":["Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit&#x00E4;t Dresden,Chair of Adaptive Dynamic Systems,Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34417895,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8287000060081482,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.8287000060081482,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.0997999981045723,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.007600000128149986,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.628000020980835},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5691999793052673},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.550599992275238},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5250999927520752},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5192999839782715},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.3767000138759613}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7767000198364258},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.628000020980835},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5691999793052673},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.550599992275238},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5302000045776367},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5250999927520752},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5192999839782715},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4156999886035919},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.3767000138759613},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.26510000228881836},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.26460000872612},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/socc66126.2025.11235386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc66126.2025.11235386","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 38th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2980249900","https://openalex.org/W3102724434","https://openalex.org/W4235274905","https://openalex.org/W4288444508","https://openalex.org/W4308083742","https://openalex.org/W4310370770","https://openalex.org/W4365447480","https://openalex.org/W4401455319"],"related_works":[],"abstract_inverted_index":{"Macro-operation":[0],"fusion":[1,41,60,70,97,114],"represents":[2],"an":[3],"advanced":[4],"approach":[5],"to":[6,37,62],"optimizing":[7],"instruction":[8,40,56],"streams":[9],"with":[10],"the":[11,44,95,128],"goal":[12],"of":[13,82,104],"enhancing":[14],"processor":[15],"efficiency,":[16],"offering":[17],"potential":[18],"benefits":[19],"for":[20,47],"compute-intensive":[21],"domains":[22],"including":[23],"deep":[24,90,108],"learning":[25,91,109],"and":[26,59,64,68,74,88,111,133,141],"large":[27,116],"language":[28,117],"models.":[29,49],"This":[30],"work":[31],"presents":[32],"a":[33,79],"software-based":[34],"framework":[35,51],"designed":[36],"systematically":[38],"explore":[39],"strategies":[42],"targeting":[43,72],"RISC-V":[45,139],"ISA":[46],"TensorFlow-based":[48],"The":[50],"combines":[52],"model":[53],"transformation,":[54],"dynamic":[55,105],"trace":[57],"analysis,":[58],"simulation":[61],"identify":[63],"evaluate":[65],"both":[66],"established":[67],"novel":[69],"idioms":[71],"integer":[73],"floating-point":[75],"operations.":[76],"Evaluation":[77],"across":[78],"diverse":[80],"set":[81],"benchmarks,":[83],"spanning":[84],"classical":[85],"computational":[86],"tasks":[87],"modern":[89],"workloads,":[92],"demonstrates":[93],"that":[94],"proposed":[96],"techniques":[98],"effectively":[99],"fuse,":[100],"on":[101],"average,":[102],"30%":[103],"instructions":[106],"in":[107,115],"models,":[110],"over":[112],"60%":[113],"models":[118],"such":[119],"as":[120],"GPT-2.":[121],"These":[122],"results":[123],"provide":[124],"valuable":[125],"insights":[126],"into":[127],"trade-offs":[129],"between":[130],"software-driven":[131],"optimization":[132],"necessary":[134],"hardware":[135],"extensions,":[136],"informing":[137],"future":[138],"microarchitecture":[140],"compiler":[142],"design":[143],"aimed":[144],"at":[145],"maximizing":[146],"instruction-level":[147],"parallelism.":[148]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-17T00:00:00"}
