{"id":"https://openalex.org/W3138768204","doi":"https://doi.org/10.1109/pact.2013.6618821","title":"McRouter: multicast within a router for high performance network-on-chips","display_name":"McRouter: multicast within a router for high performance network-on-chips","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W3138768204","doi":"https://doi.org/10.1109/pact.2013.6618821","mag":"3138768204"},"language":"en","primary_location":{"id":"doi:10.1109/pact.2013.6618821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2013.6618821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065946847","display_name":"Janghaeng Lee","orcid":"https://orcid.org/0009-0005-1915-6077"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Janghaeng Lee","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058642630","display_name":"Mehrzad Samadi","orcid":"https://orcid.org/0000-0002-3581-1255"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mehrzad Samadi","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086980677","display_name":"Yongjun Park","orcid":"https://orcid.org/0000-0003-3725-0380"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongjun Park","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002075773","display_name":"Scott Mahlke","orcid":"https://orcid.org/0000-0002-0438-0616"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Mahlke","raw_affiliation_strings":["Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Computer Architecture Laboratory, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5065946847"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":0.6304,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.73350594,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"319","last_page":"330"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8901851177215576},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6790404915809631},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6635339856147766},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6162779331207275},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5749127268791199},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.4937759339809418},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.473949670791626},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.4641270637512207},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2989698648452759}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8901851177215576},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6790404915809631},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6635339856147766},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6162779331207275},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5749127268791199},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.4937759339809418},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.473949670791626},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.4641270637512207},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2989698648452759},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/pact.2013.6618821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/pact.2013.6618821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1502558230","https://openalex.org/W2011039300","https://openalex.org/W2024122052","https://openalex.org/W2028914809","https://openalex.org/W2109426995","https://openalex.org/W2109515201","https://openalex.org/W2112340065","https://openalex.org/W2128046183","https://openalex.org/W2140375692","https://openalex.org/W2150476673","https://openalex.org/W2159456929","https://openalex.org/W2159481344","https://openalex.org/W2160428323","https://openalex.org/W2167101788","https://openalex.org/W3146774908","https://openalex.org/W3149591378","https://openalex.org/W4236137412","https://openalex.org/W4246166885","https://openalex.org/W6630033071"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W3012895752","https://openalex.org/W2473478803","https://openalex.org/W2729363167","https://openalex.org/W2060611139"],"abstract_inverted_index":{"Heterogeneous":[0],"computing":[1],"on":[2,85,93,217],"CPUs":[3,79,129],"and":[4,68,130,164,196,224],"GPUs":[5,84,200,227],"has":[6,60],"traditionally":[7],"used":[8],"fixed":[9,95],"roles":[10,96],"for":[11,136,235],"each":[12],"device:":[13],"the":[14,30,38,58,64,94,107,145,149,161,167,184,188,197],"GPU":[15],"handles":[16,32,187],"data":[17,42,193],"parallel":[18],"work":[19,47,91],"by":[20,176],"taking":[21],"advantage":[22],"of":[23,27,74,121,155,191,215,238],"its":[24],"massive":[25],"number":[26],"cores":[28],"while":[29,144],"CPU":[31,223],"non":[33],"data-parallel":[34,124,140],"work,":[35],"such":[36],"as":[37,54],"sequential":[39],"code":[40],"or":[41],"transfer":[43,194],"management.":[44],"Unfortunately,":[45],"this":[46,103],"distribution":[48],"can":[49],"be":[50,98],"a":[51,71,99,114,122,138,218,230,236],"poor":[52,100],"solution":[53],"it":[55],"under":[56],"utilizes":[57],"CPU,":[59],"difficulty":[61],"generalizing":[62],"beyond":[63],"single":[65,108,123,139],"CPU-GPU":[66],"combination,":[67],"may":[69,97],"waste":[70],"large":[72],"fraction":[73],"time":[75],"transferring":[76],"data.":[77],"Further,":[78],"are":[80],"performance":[81,174,198],"competitive":[82],"with":[83,202,220],"many":[86],"workloads,":[87,163],"thus":[88],"simply":[89],"partitioning":[90],"based":[92],"choice.":[101],"In":[102],"paper,":[104],"we":[105],"present":[106],"kernel":[109,125,141],"multiple":[110,127],"devices":[111],"(SKMD)":[112],"system,":[113],"framework":[115],"that":[116],"transparently":[117],"orchestrates":[118],"collaborative":[119],"execution":[120,233],"across":[126,151],"asymmetric":[128,226],"GPUs.":[131],"The":[132,171],"programmer":[133],"is":[134,173],"responsible":[135],"developing":[137],"in":[142],"OpenCL,":[143],"system":[146,219],"automatically":[147],"partitions":[148],"workload":[150],"an":[152,212],"arbitrary":[153],"set":[154,237],"devices,":[156],"generates":[157],"kernels":[158],"to":[159,182,204,229],"execute":[160,183],"partial":[162,168],"efficiently":[165],"merges":[166],"outputs":[169],"together.":[170],"goal":[172],"improvement":[175],"maximally":[177],"utilizing":[178],"all":[179],"available":[180],"resources":[181],"kernel.":[185],"SKMD":[186,210],"difficult":[189],"challenges":[190],"exposed":[192],"costs":[195],"variations":[199],"have":[201],"respect":[203],"input":[205],"size.":[206],"On":[207],"real":[208],"hardware,":[209],"achieves":[211],"average":[213],"speedup":[214],"29%":[216],"one":[221],"multicore":[222],"two":[225],"compared":[228],"fastest":[231],"device":[232],"strategy":[234],"popular":[239],"OpenCL":[240],"kernels.":[241]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
