{"id":"https://openalex.org/W2081597309","doi":"https://doi.org/10.1145/1122971.1122987","title":"Hardware profile-guided automatic page placement for ccNUMA systems","display_name":"Hardware profile-guided automatic page placement for ccNUMA systems","publication_year":2006,"publication_date":"2006-03-29","ids":{"openalex":"https://openalex.org/W2081597309","doi":"https://doi.org/10.1145/1122971.1122987","mag":"2081597309"},"language":"en","primary_location":{"id":"doi:10.1145/1122971.1122987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1122971.1122987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the eleventh ACM SIGPLAN symposium on Principles and practice of parallel programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050458084","display_name":"Jaydeep Marathe","orcid":null},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jaydeep Marathe","raw_affiliation_strings":["North Carolina State University, Raleigh, NC","North Carolina State University, Raleigh, NC;"],"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC","institution_ids":["https://openalex.org/I137902535"]},{"raw_affiliation_string":"North Carolina State University, Raleigh, NC;","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107279953","display_name":"Frank Mueller","orcid":null},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank Mueller","raw_affiliation_strings":["North Carolina State University, Raleigh, NC","North Carolina State University, Raleigh, NC;"],"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC","institution_ids":["https://openalex.org/I137902535"]},{"raw_affiliation_string":"North Carolina State University, Raleigh, NC;","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5050458084"],"corresponding_institution_ids":["https://openalex.org/I137902535"],"apc_list":null,"apc_paid":null,"fwci":5.5748,"has_fulltext":false,"cited_by_count":76,"citation_normalized_percentile":{"value":0.96260989,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"90","last_page":"99"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8673707246780396},{"id":"https://openalex.org/keywords/virtual-memory","display_name":"Virtual memory","score":0.6865357160568237},{"id":"https://openalex.org/keywords/page-fault","display_name":"Page fault","score":0.6534865498542786},{"id":"https://openalex.org/keywords/page","display_name":"Page","score":0.5624133348464966},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.553800106048584},{"id":"https://openalex.org/keywords/demand-paging","display_name":"Demand paging","score":0.5489684343338013},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.5361243486404419},{"id":"https://openalex.org/keywords/address-space","display_name":"Address space","score":0.5128470063209534},{"id":"https://openalex.org/keywords/physical-address","display_name":"Physical address","score":0.48578929901123047},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.48219195008277893},{"id":"https://openalex.org/keywords/interleaved-memory","display_name":"Interleaved memory","score":0.46373501420021057},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.43753504753112793},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.3688265383243561},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3189426064491272},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.29255327582359314},{"id":"https://openalex.org/keywords/overlay","display_name":"Overlay","score":0.28132927417755127},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.278544545173645},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.12787386775016785}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8673707246780396},{"id":"https://openalex.org/C76399640","wikidata":"https://www.wikidata.org/wiki/Q189401","display_name":"Virtual memory","level":4,"score":0.6865357160568237},{"id":"https://openalex.org/C193343404","wikidata":"https://www.wikidata.org/wiki/Q1928607","display_name":"Page fault","level":5,"score":0.6534865498542786},{"id":"https://openalex.org/C33925742","wikidata":"https://www.wikidata.org/wiki/Q361698","display_name":"Page","level":2,"score":0.5624133348464966},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.553800106048584},{"id":"https://openalex.org/C188873839","wikidata":"https://www.wikidata.org/wiki/Q5255045","display_name":"Demand paging","level":5,"score":0.5489684343338013},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.5361243486404419},{"id":"https://openalex.org/C144240696","wikidata":"https://www.wikidata.org/wiki/Q367204","display_name":"Address space","level":2,"score":0.5128470063209534},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.48578929901123047},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.48219195008277893},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.46373501420021057},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.43753504753112793},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.3688265383243561},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3189426064491272},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.29255327582359314},{"id":"https://openalex.org/C136085584","wikidata":"https://www.wikidata.org/wiki/Q910289","display_name":"Overlay","level":2,"score":0.28132927417755127},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.278544545173645},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.12787386775016785},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1122971.1122987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1122971.1122987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the eleventh ACM SIGPLAN symposium on Principles and practice of parallel programming","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W155912745","https://openalex.org/W1993223688","https://openalex.org/W2112121929","https://openalex.org/W2113729313","https://openalex.org/W2115617161","https://openalex.org/W2140455011","https://openalex.org/W2153644244","https://openalex.org/W2157632164"],"related_works":["https://openalex.org/W2017276153","https://openalex.org/W4241723377","https://openalex.org/W2059368477","https://openalex.org/W1437641643","https://openalex.org/W2160091740","https://openalex.org/W2078640694","https://openalex.org/W3196094483","https://openalex.org/W2038379180","https://openalex.org/W2521131576","https://openalex.org/W2081597309"],"abstract_inverted_index":{"Cache":[0],"coherent":[1],"non-uniform":[2],"memory":[3,60,76,93,184,221],"architectures":[4],"(ccNUMA)":[5],"constitute":[6],"an":[7,104,180,304],"important":[8],"class":[9],"of":[10,26,32,36,42,120,174,183,229,243,259,266,310,322,334,346],"high-performance":[11],"computing":[12],"plat-forms.":[13],"Contemporary":[14],"ccNUMA":[15],"systems,":[16],"such":[17,129],"as":[18,81],"the":[19,48,51,58,78,82,113,195,199,233,241,252,264,267,270,325,332,339],"SGI":[20],"Altix,":[21],"have":[22],"a":[23,33,39,95,130,145,227,283,318,343,347],"large":[24],"number":[25,35,119,258],"nodes,":[27,64],"where":[28],"each":[29],"node":[30,80,196],"consists":[31],"small":[34],"processors":[37,46,161],"and":[38,65,213,235,269],"fixed":[40],"amount":[41],"physical":[43,59,75],"memory.":[44,125],"All":[45],"in":[47,86,137,207,261,273],"system":[49,362],"access":[50,124,165],"same":[52,79],"global":[53],"virtual":[54],"address":[55],"space":[56],"but":[57,218],"is":[61,67,188,201,210,338],"distributed":[62],"across":[63],"coherence":[66],"maintained":[68],"using":[69],"hardware":[70,246],"mechanisms.":[71],"Accesses":[72],"to":[73,91,112,123,134,177,190,197,251,256,303],"local":[74],"(on":[77,94],"requesting":[83,114],"processor)":[84],"results":[85],"lower":[87],"latencies":[88],"than":[89,289,312],"accesses":[90],"remote":[92],"different":[96,245],"node).":[97],"Since":[98],"many":[99],"scientific":[100],"programs":[101],"are":[102],"memory-bound,":[103],"intelligent":[105],"page-placement":[106],"policy":[107,131],"that":[108,128,162,166,279,294,356],"allocates":[109,158],"pages":[110,159],"closer":[111],"processor":[115],"can":[116,132,297],"significantly":[117],"reduce":[118],"cycles":[121],"required":[122],"We":[126,239,277],"show":[127,278,293],"lead":[133],"significant":[135],"savings":[136,272],"wall-clock":[138,274,306],"execution":[139,275,307],"time.In":[140],"this":[141,337],"paper,":[142],"we":[143],"introduce":[144],"novel":[146],"hardware-assisted":[147],"page":[148,192,200,287,300,353],"placement":[149,156,288,354],"scheme":[150,157,169,355],"based":[151],"on":[152,342],"automated":[153],"profiling.":[154],"The":[155,168],"near":[160],"most":[163],"frequently":[164],"page.":[167],"leverages":[170],"performance":[171],"monitoring":[172],"capabilities":[173],"contemporary":[175],"microprocessors":[176],"efficiently":[178,298],"extract":[179],"approximate":[181],"trace":[182],"accesses.":[185],"This":[186],"information":[187],"used":[189],"decide":[191],"affinity,":[193],"i.e.,":[194],"which":[198],"bound.":[202],"Our":[203],"method":[204,296],"operates":[205],"entirely":[206],"user":[208,349],"space,":[209],"widely":[211],"automated,":[212],"handles":[214],"not":[215],"only":[216],"static":[217],"also":[219],"dynamic":[220],"allocation.We":[222],"evaluate":[223],"our":[224,295,315,335],"framework":[225],"with":[226,249,317],"set":[228],"multi-threaded":[230],"benchmarks":[231],"from":[232],"NAS":[234],"SPEC":[236],"OpenMP":[237],"suites.":[238],"investigate":[240],"use":[242],"two":[244],"profile":[247,268],"sources":[248],"respect":[250],"cost":[253],"(e.g.,":[254],"time":[255,308],"trace,":[257],"records":[260],"profile)":[262],"vs.":[263],"accuracy":[265],"corresponding":[271],"time.":[276,330],"long-latency":[280],"loads":[281],"provide":[282],"better":[284],"indicator":[285],"for":[286,314],"TLB":[290],"misses.Our":[291],"experiments":[292],"improve":[299],"placement,":[301],"leading":[302],"average":[305],"saving":[309],"more":[311],"20%":[313],"benchmarks,":[316],"one-time":[319],"profiling":[320],"overhead":[321],"2.7%":[323],"over":[324],"overall":[326],"original":[327],"program":[328],"wallclock":[329],"To":[331],"best":[333],"knowledge,":[336],"first":[340],"evaluation":[341],"real":[344],"machine":[345],"completely":[348],"mode":[350],"interrupt-driven":[351],"profile-guided":[352],"requires":[357],"no":[358],"special":[359],"compiler,":[360],"operating":[361],"or":[363],"network":[364],"interconnect":[365],"support.":[366]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
