{"id":"https://openalex.org/W7117704932","doi":"https://doi.org/10.1109/mcsoc67473.2025.00017","title":"A Study on the Performance and Usability of Managed Memory and Unified Memory for Accelerating Numerical Calculation Program","display_name":"A Study on the Performance and Usability of Managed Memory and Unified Memory for Accelerating Numerical Calculation Program","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W7117704932","doi":"https://doi.org/10.1109/mcsoc67473.2025.00017"},"language":"en","primary_location":{"id":"doi:10.1109/mcsoc67473.2025.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mcsoc67473.2025.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 18th International Symposium on Embedded Multicore/Many-core Systems-on-Chip (MCSoC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://catalog.lib.kyushu-u.ac.jp/opac_download_md/7420563/7420563.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080142917","display_name":"Satoshi Ohshima","orcid":"https://orcid.org/0000-0003-4073-5688"},"institutions":[{"id":"https://openalex.org/I135598925","display_name":"Kyushu University","ror":"https://ror.org/00p4k0j84","country_code":"JP","type":"education","lineage":["https://openalex.org/I135598925"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Satoshi Ohshima","raw_affiliation_strings":["Research Institute for Information Technology, Kyushu University,Fukuoka,Japan"],"raw_orcid":"https://orcid.org/0000-0003-4073-5688","affiliations":[{"raw_affiliation_string":"Research Institute for Information Technology, Kyushu University,Fukuoka,Japan","institution_ids":["https://openalex.org/I135598925"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042523389","display_name":"Akihiro Ida","orcid":"https://orcid.org/0000-0001-7751-1093"},"institutions":[{"id":"https://openalex.org/I1315852903","display_name":"Japan Agency for Marine-Earth Science and Technology","ror":"https://ror.org/059qg2m13","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1315852903"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Akihiro Ida","raw_affiliation_strings":["Research Institute for Value-Added-Information Generation,Japan Agency for Marine-Earth Science and Technology,Kanagawa,Japan"],"raw_orcid":"https://orcid.org/0000-0001-7751-1093","affiliations":[{"raw_affiliation_string":"Research Institute for Value-Added-Information Generation,Japan Agency for Marine-Earth Science and Technology,Kanagawa,Japan","institution_ids":["https://openalex.org/I1315852903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009492561","display_name":"Masatoshi Kawai","orcid":"https://orcid.org/0009-0003-1454-202X"},"institutions":[{"id":"https://openalex.org/I4210093896","display_name":"Tohoku University Hospital","ror":"https://ror.org/00kcd6x60","country_code":"JP","type":"healthcare","lineage":["https://openalex.org/I4210093896"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masatoshi Kawai","raw_affiliation_strings":["Tohoku University,Cyberscience Center,Miyagi,Japan"],"raw_orcid":"https://orcid.org/0009-0003-1454-202X","affiliations":[{"raw_affiliation_string":"Tohoku University,Cyberscience Center,Miyagi,Japan","institution_ids":["https://openalex.org/I4210093896"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121649823","display_name":"Takeshi Fukaya","orcid":null},"institutions":[{"id":"https://openalex.org/I159165171","display_name":"Hokkaido Information University","ror":"https://ror.org/00nyxpe17","country_code":"JP","type":"education","lineage":["https://openalex.org/I159165171"]},{"id":"https://openalex.org/I205349734","display_name":"Hokkaido University","ror":"https://ror.org/02e16g702","country_code":"JP","type":"education","lineage":["https://openalex.org/I205349734"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takeshi Fukaya","raw_affiliation_strings":["Hokkaido University,Information Initiative Center,Hokkaido,Japan"],"raw_orcid":"https://orcid.org/0000-0003-1217-6444","affiliations":[{"raw_affiliation_string":"Hokkaido University,Information Initiative Center,Hokkaido,Japan","institution_ids":["https://openalex.org/I159165171","https://openalex.org/I205349734"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025266920","display_name":"Rio Yokota","orcid":null},"institutions":[{"id":"https://openalex.org/I4210125947","display_name":"Japan Research Institute","ror":"https://ror.org/02m5srn05","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210125947"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Rio Yokota","raw_affiliation_strings":["Institute of Integrated Research, Institute of Science Tokyo,Tokyo,Japan"],"raw_orcid":"https://orcid.org/0000-0001-7573-7873","affiliations":[{"raw_affiliation_string":"Institute of Integrated Research, Institute of Science Tokyo,Tokyo,Japan","institution_ids":["https://openalex.org/I4210125947"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080142917"],"corresponding_institution_ids":["https://openalex.org/I135598925"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.67221366,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"41","last_page":"48"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5153999924659729,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5153999924659729,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.3813999891281128,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.00989999994635582,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5648999810218811},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5040000081062317},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.459199994802475},{"id":"https://openalex.org/keywords/distributed-shared-memory","display_name":"Distributed shared memory","score":0.44780001044273376},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.43130001425743103},{"id":"https://openalex.org/keywords/flat-memory-model","display_name":"Flat memory model","score":0.4230000078678131},{"id":"https://openalex.org/keywords/memory-model","display_name":"Memory model","score":0.4106999933719635},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.4032000005245209},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.40130001306533813}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.820900022983551},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5763999819755554},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5648999810218811},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5040000081062317},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.459199994802475},{"id":"https://openalex.org/C39528615","wikidata":"https://www.wikidata.org/wiki/Q1229610","display_name":"Distributed shared memory","level":5,"score":0.44780001044273376},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.43130001425743103},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.4230000078678131},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.4106999933719635},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.4032000005245209},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.40130001306533813},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4000000059604645},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.39070001244544983},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.3882000148296356},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.37389999628067017},{"id":"https://openalex.org/C2778241615","wikidata":"https://www.wikidata.org/wiki/Q83303","display_name":"Fortran","level":2,"score":0.3652999997138977},{"id":"https://openalex.org/C171675096","wikidata":"https://www.wikidata.org/wiki/Q1143380","display_name":"Extended memory","level":4,"score":0.36000001430511475},{"id":"https://openalex.org/C153247305","wikidata":"https://www.wikidata.org/wiki/Q835713","display_name":"Memory address","level":3,"score":0.3203999996185303},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.311599999666214},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.28999999165534973},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.28439998626708984},{"id":"https://openalex.org/C92855701","wikidata":"https://www.wikidata.org/wiki/Q5830907","display_name":"Computer memory","level":3,"score":0.2782000005245209},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2721000015735626},{"id":"https://openalex.org/C53838383","wikidata":"https://www.wikidata.org/wiki/Q541148","display_name":"Conventional memory","level":5,"score":0.26350000500679016},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26179999113082886},{"id":"https://openalex.org/C76399640","wikidata":"https://www.wikidata.org/wiki/Q189401","display_name":"Virtual memory","level":4,"score":0.25440001487731934},{"id":"https://openalex.org/C189930140","wikidata":"https://www.wikidata.org/wiki/Q1112878","display_name":"CAS latency","level":4,"score":0.25369998812675476}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/mcsoc67473.2025.00017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mcsoc67473.2025.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 18th International Symposium on Embedded Multicore/Many-core Systems-on-Chip (MCSoC)","raw_type":"proceedings-article"},{"id":"pmh:oai:catalog.lib.kyushu-u.ac.jp:2324/7420563","is_oa":true,"landing_page_url":"http://hdl.handle.net/2324/7420563","pdf_url":"https://catalog.lib.kyushu-u.ac.jp/opac_download_md/7420563/7420563.pdf","source":{"id":"https://openalex.org/S4306402148","display_name":"QIR (Kyushu University Institutional Repository) (Kyushu University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I135598925","host_organization_name":"Kyushu University","host_organization_lineage":["https://openalex.org/I135598925"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2025 IEEE 18th International Symposium on Embedded Multicore/Many-core Systems-on-Chip (MCSoC)","raw_type":"conference paper"}],"best_oa_location":{"id":"pmh:oai:catalog.lib.kyushu-u.ac.jp:2324/7420563","is_oa":true,"landing_page_url":"http://hdl.handle.net/2324/7420563","pdf_url":"https://catalog.lib.kyushu-u.ac.jp/opac_download_md/7420563/7420563.pdf","source":{"id":"https://openalex.org/S4306402148","display_name":"QIR (Kyushu University Institutional Repository) (Kyushu University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I135598925","host_organization_name":"Kyushu University","host_organization_lineage":["https://openalex.org/I135598925"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2025 IEEE 18th International Symposium on Embedded Multicore/Many-core Systems-on-Chip (MCSoC)","raw_type":"conference paper"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.5701995491981506,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7117704932.pdf","grobid_xml":"https://content.openalex.org/works/W7117704932.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W2045635967","https://openalex.org/W2062970737","https://openalex.org/W2995553094","https://openalex.org/W3015424312","https://openalex.org/W3015594417","https://openalex.org/W4281290781","https://openalex.org/W4362713195","https://openalex.org/W4401408779"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,86],"comparative":[4],"study":[5],"of":[6,89],"NVIDIA":[7],"Managed":[8,71],"Memory":[9,12,68,72],"and":[10,39,48,101],"Unified":[11,67],"for":[13,78,92,105],"GPU-accelerated":[14],"numerical":[15],"computing":[16],"using":[17,42],"CUDA":[18,29,93],"Fortran.":[19],"We":[20],"focus":[21],"on":[22,46,73],"Block":[23],"Low-Rank":[24],"QR":[25],"factorization":[26],"implemented":[27],"with":[28],"Fortran,":[30],"evaluating":[31],"two":[32],"approaches:":[33],"memory":[34,54,90,107],"control":[35],"via":[36],"compiler":[37],"option":[38],"manual":[40,53],"specification":[41],"variable":[43],"qualifiers.":[44],"Experiments":[45],"GH200":[47],"H100":[49],"systems":[50],"reveal":[51],"that":[52],"placement":[55,108],"reduces":[56],"runtime":[57],"by":[58],"up":[59],"to":[60,64],"68":[61],"%":[62],"compared":[63],"compiler-option-based":[65],"control.":[66],"consistently":[69],"outperforms":[70],"GH200,":[74],"highlighting":[75],"its":[76],"suitability":[77],"integrated":[79],"CPU/GPU":[80],"architectures.":[81],"Our":[82],"contributions":[83],"are:":[84],"(1)":[85],"systematic":[87],"evaluation":[88],"strategies":[91],"Fortran":[94],"applications,":[95],"(2)":[96],"insights":[97],"into":[98],"productivity-performance":[99],"trade-offs,":[100],"(3)":[102],"practical":[103],"guidelines":[104],"auto-tuning":[106],"in":[109],"HPC":[110],"environments.":[111]},"counts_by_year":[],"updated_date":"2026-05-01T08:36:08.643496","created_date":"2025-12-31T00:00:00"}
