{"id":"https://openalex.org/W2114736933","doi":"https://doi.org/10.1109/hpca.2011.5749721","title":"MOPED: Orchestrating interprocess message data on CMPs","display_name":"MOPED: Orchestrating interprocess message data on CMPs","publication_year":2011,"publication_date":"2011-02-01","ids":{"openalex":"https://openalex.org/W2114736933","doi":"https://doi.org/10.1109/hpca.2011.5749721","mag":"2114736933"},"language":"en","primary_location":{"id":"doi:10.1109/hpca.2011.5749721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2011.5749721","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE 17th International Symposium on High Performance Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112676326","display_name":"Junli Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I4210165005","display_name":"BioElectronics (United States)","ror":"https://ror.org/05mh3a113","country_code":"US","type":"company","lineage":["https://openalex.org/I4210165005"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Junli Gu","raw_affiliation_strings":["Coordinated Science Laboratory, University of Illinois, Urbana-Champaign, USA","Institute of Microelectronics, Tsinghua University, Beijing, IL, USA"],"affiliations":[{"raw_affiliation_string":"Coordinated Science Laboratory, University of Illinois, Urbana-Champaign, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Institute of Microelectronics, Tsinghua University, Beijing, IL, USA","institution_ids":["https://openalex.org/I4210165005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108619331","display_name":"Steven S. Lumetta","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven S. Lumetta","raw_affiliation_strings":["Coordinated Science Laboratory, University of Illinois, Urbana-Champaign, USA"],"affiliations":[{"raw_affiliation_string":"Coordinated Science Laboratory, University of Illinois, Urbana-Champaign, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067547869","display_name":"Rakesh Kumar","orcid":"https://orcid.org/0000-0002-3290-2629"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rakesh Kumar","raw_affiliation_strings":["Coordinated Science Laboratory, University of Illinois, Urbana-Champaign, USA"],"affiliations":[{"raw_affiliation_string":"Coordinated Science Laboratory, University of Illinois, Urbana-Champaign, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102093599","display_name":"Yihe Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165005","display_name":"BioElectronics (United States)","ror":"https://ror.org/05mh3a113","country_code":"US","type":"company","lineage":["https://openalex.org/I4210165005"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yihe Sun","raw_affiliation_strings":["Institute of Microelectronics, Tsinghua University, Beijing, IL, USA"],"affiliations":[{"raw_affiliation_string":"Institute of Microelectronics, Tsinghua University, Beijing, IL, USA","institution_ids":["https://openalex.org/I4210165005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112676326"],"corresponding_institution_ids":["https://openalex.org/I157725225","https://openalex.org/I4210165005"],"apc_list":null,"apc_paid":null,"fwci":0.2577,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.58631802,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"33","issue":null,"first_page":"111","last_page":"120"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9061939716339111},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.7250423431396484},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.622663676738739},{"id":"https://openalex.org/keywords/message-passing","display_name":"Message passing","score":0.5551770925521851},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4838325083255768},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4636225998401642},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4357284903526306},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4292008578777313},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4258561134338379},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.41073113679885864},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.33912065625190735},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.30821627378463745},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.27319592237472534},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2543034553527832}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9061939716339111},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.7250423431396484},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.622663676738739},{"id":"https://openalex.org/C854659","wikidata":"https://www.wikidata.org/wiki/Q1859284","display_name":"Message passing","level":2,"score":0.5551770925521851},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4838325083255768},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4636225998401642},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4357284903526306},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4292008578777313},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4258561134338379},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.41073113679885864},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.33912065625190735},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.30821627378463745},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.27319592237472534},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2543034553527832},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/hpca.2011.5749721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2011.5749721","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE 17th International Symposium on High Performance Computer Architecture","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.380.1326","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.380.1326","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://passat.crhc.illinois.edu/rakeshk/hpca11_cam.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307757","display_name":"Advanced Micro Devices","ror":"https://ror.org/04kd6c783"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1579133219","https://openalex.org/W1974420955","https://openalex.org/W1979450609","https://openalex.org/W1996030720","https://openalex.org/W2002044167","https://openalex.org/W2022792326","https://openalex.org/W2042559279","https://openalex.org/W2088032806","https://openalex.org/W2113167168","https://openalex.org/W2113290275","https://openalex.org/W2126990153","https://openalex.org/W2152609355","https://openalex.org/W2164264749","https://openalex.org/W6634791753","https://openalex.org/W6649236435"],"related_works":["https://openalex.org/W2337418885","https://openalex.org/W4207012101","https://openalex.org/W2133682266","https://openalex.org/W2497617944","https://openalex.org/W2167303720","https://openalex.org/W1563139915","https://openalex.org/W2109715593","https://openalex.org/W2061075966","https://openalex.org/W3147501184","https://openalex.org/W2268996566"],"abstract_inverted_index":{"Future":[0],"CMPs":[1],"will":[2],"combine":[3],"many":[4],"simple":[5],"cores":[6],"with":[7,168],"deep":[8],"cache":[9,14,60,110],"hierarchies.":[10],"With":[11],"more":[12],"cores,":[13],"resources":[15],"per":[16],"core":[17],"are":[18,219],"fewer,":[19],"and":[20,32,58,70,88,112,119,132,150,164,181,208,225],"must":[21,73],"be":[22,53,75],"shared":[23],"carefully":[24],"to":[25,30,55,63,97,115,123,133,172,179,192],"avoid":[26],"poor":[27],"utilization":[28],"due":[29],"conflicts":[31],"pollution.":[33],"Explicit":[34],"motion":[35],"of":[36,200,209],"data":[37,71,126,136],"in":[38,121,137,139],"these":[39,65],"architectures,":[40],"such":[41,103],"as":[42,104],"message":[43,100,117,125,174],"passing,":[44],"can":[45,52],"provide":[46],"hints":[47],"about":[48],"program":[49],"behavior":[50],"that":[51,93],"used":[54],"hide":[56],"latency":[57],"improve":[59],"behavior.":[61],"However,":[62],"make":[64],"models":[66],"attractive,":[67],"synchronization":[68,118],"overhead":[69],"copying":[72],"also":[74],"offloaded":[76],"from":[77],"the":[78,108,153,169,188],"processors.":[79],"In":[80],"this":[81],"paper,":[82],"we":[83],"describe":[84],"a":[85,140,157],"Message":[86],"Orchestration":[87],"Performance":[89],"Enhancement":[90],"Device":[91],"(MOPED)":[92],"provides":[94],"hardware":[95],"mechanisms":[96],"support":[98,116],"state-of-the-art":[99],"passing":[101],"protocols":[102],"MPI.":[105],"MOPED":[106,143,166,186,196],"extends":[107],"per-processor":[109],"controllers":[111,171],"coherence":[113,182],"protocol":[114],"management":[120],"hardware,":[122],"transfer":[124],"efficiently":[127],"without":[128],"intermediate":[129],"buffer":[130],"copies,":[131],"place":[134],"useful":[135],"caches":[138],"timely":[141],"manner.":[142],"thus":[144],"allows":[145],"full":[146],"overlap":[147],"between":[148],"communication":[149,210],"computation":[151],"on":[152,162],"cores.":[154],"We":[155,176],"extended":[156],"16-core":[158],"full-system":[159],"simulator":[160],"based":[161],"Simics":[163],"FeS2.":[165],"interacts":[167],"directory":[170],"orchestrate":[173],"data.":[175],"evaluated":[177],"benefits":[178],"performance":[180],"traffic":[183],"by":[184,206,214,221,226],"integrating":[185],"into":[187],"MPICH":[189],"runtime.":[190],"Relative":[191],"unmodified":[193],"MPI":[194],"execution,":[195],"reduces":[197],"execution":[198],"time":[199],"real":[201],"applications":[202,224],"(NAS":[203],"Parallel":[204],"Benchmarks)":[205],"17-45%":[207],"microbenchmarks":[211],"(Intel's":[212],"IMB)":[213],"76-94%.":[215],"Off-chip":[216],"memory":[217],"misses":[218],"reduced":[220],"43-88%":[222],"for":[223,228],"75-100%":[227],"microbenchmarks.":[229]},"counts_by_year":[{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
