{"id":"https://openalex.org/W2134452263","doi":"https://doi.org/10.1109/ipdps.2008.4536349","title":"Using hardware multithreading to overcome broadcast/reduction latency in an associative SIMD processor","display_name":"Using hardware multithreading to overcome broadcast/reduction latency in an associative SIMD processor","publication_year":2008,"publication_date":"2008-04-01","ids":{"openalex":"https://openalex.org/W2134452263","doi":"https://doi.org/10.1109/ipdps.2008.4536349","mag":"2134452263"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2008.4536349","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2008.4536349","pdf_url":null,"source":{"id":"https://openalex.org/S4210174069","display_name":"Proceedings - IEEE International Parallel and Distributed Processing Symposium","issn_l":"1530-2075","issn":["1530-2075"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Symposium on Parallel and Distributed Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023755752","display_name":"Kevin Schaffer","orcid":null},"institutions":[{"id":"https://openalex.org/I149910238","display_name":"Kent State University","ror":"https://ror.org/049pfb863","country_code":"US","type":"education","lineage":["https://openalex.org/I149910238"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kevin Schaffer","raw_affiliation_strings":["Department of Computer Science, Kent University, Kent, OH, USA","Dept. of Comput. Sci., Kent State Univ., Kent, OH"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Kent University, Kent, OH, USA","institution_ids":["https://openalex.org/I149910238"]},{"raw_affiliation_string":"Dept. of Comput. Sci., Kent State Univ., Kent, OH","institution_ids":["https://openalex.org/I149910238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017764387","display_name":"Robert A. Walker","orcid":"https://orcid.org/0000-0002-0754-6298"},"institutions":[{"id":"https://openalex.org/I149910238","display_name":"Kent State University","ror":"https://ror.org/049pfb863","country_code":"US","type":"education","lineage":["https://openalex.org/I149910238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert A. Walker","raw_affiliation_strings":["Department of Computer Science, Kent University, Kent, OH, USA","Dept. of Comput. Sci., Kent State Univ., Kent, OH"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Kent University, Kent, OH, USA","institution_ids":["https://openalex.org/I149910238"]},{"raw_affiliation_string":"Dept. of Comput. Sci., Kent State Univ., Kent, OH","institution_ids":["https://openalex.org/I149910238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5023755752"],"corresponding_institution_ids":["https://openalex.org/I149910238"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.11444375,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8403123617172241},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7893089056015015},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.7553237676620483},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.668332576751709},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6635761260986328},{"id":"https://openalex.org/keywords/simultaneous-multithreading","display_name":"Simultaneous multithreading","score":0.6072062849998474},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5473827719688416},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5155825018882751},{"id":"https://openalex.org/keywords/associative-property","display_name":"Associative property","score":0.4526025652885437},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3962116837501526},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2706170678138733},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.061847418546676636}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8403123617172241},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7893089056015015},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.7553237676620483},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.668332576751709},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6635761260986328},{"id":"https://openalex.org/C85717602","wikidata":"https://www.wikidata.org/wiki/Q82178","display_name":"Simultaneous multithreading","level":4,"score":0.6072062849998474},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5473827719688416},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5155825018882751},{"id":"https://openalex.org/C159423971","wikidata":"https://www.wikidata.org/wiki/Q177251","display_name":"Associative property","level":2,"score":0.4526025652885437},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3962116837501526},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2706170678138733},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.061847418546676636},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdps.2008.4536349","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2008.4536349","pdf_url":null,"source":{"id":"https://openalex.org/S4210174069","display_name":"Proceedings - IEEE International Parallel and Distributed Processing Symposium","issn_l":"1530-2075","issn":["1530-2075"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Symposium on Parallel and Distributed Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1526588783","https://openalex.org/W1654743423","https://openalex.org/W2026603021","https://openalex.org/W2120230074","https://openalex.org/W2127812937","https://openalex.org/W2132392700","https://openalex.org/W2135601680","https://openalex.org/W2163715620","https://openalex.org/W3142147837","https://openalex.org/W4252934402","https://openalex.org/W6631347860","https://openalex.org/W6679116320","https://openalex.org/W6679758096"],"related_works":["https://openalex.org/W2115561485","https://openalex.org/W1985089255","https://openalex.org/W2153202644","https://openalex.org/W2380961080","https://openalex.org/W2010970156","https://openalex.org/W4235861380","https://openalex.org/W2106625514","https://openalex.org/W2726029565","https://openalex.org/W4248655967","https://openalex.org/W1867214769"],"abstract_inverted_index":{"The":[0],"latency":[1],"of":[2,12,26,54,80,86,136,139],"broadcast/reduction":[3,45],"operations":[4,128],"has":[5],"a":[6,33,111],"significant":[7],"impact":[8],"on":[9],"the":[10,44,55,84,92,98,137],"performance":[11,68],"SIMD":[13,36],"processors.":[14],"This":[15],"is":[16,64,94,129],"especially":[17],"true":[18],"for":[19,74],"associative":[20,35,59],"programs,":[21],"which":[22],"make":[23],"extensive":[24],"use":[25],"global":[27],"search":[28],"operations.":[29],"Previously,":[30],"we":[31,50],"developed":[32],"prototype":[34],"processor":[37,56],"that":[38,61,110,114,118],"uses":[39],"hardware":[40,62,93],"multithreading":[41,63],"to":[42,66,122,131],"overcome":[43],"latency.":[46],"In":[47],"this":[48],"paper":[49],"show,":[51],"through":[52],"simulations":[53],"running":[57],"an":[58],"program,":[60],"able":[65,130],"improve":[67],"by":[69,91],"increasing":[70],"system":[71,133],"utilization,":[72],"even":[73],"processors":[75],"with":[76],"hundreds":[77],"or":[78,125],"thousands":[79],"processing":[81],"elements.":[82],"However,":[83],"choice":[85],"thread":[87,105,112,126],"scheduling":[88,106],"policy":[89],"used":[90],"critical":[95],"in":[96],"determining":[97],"actual":[99],"utilization":[100,134],"achieved.":[101],"We":[102],"consider":[103],"three":[104],"policies":[107],"and":[108],"show":[109],"scheduler":[113],"avoids":[115],"issuing":[116],"threads":[117],"will":[119],"stall":[120],"due":[121],"pipeline":[123],"dependencies":[124],"synchronization":[127],"maintain":[132],"independent":[135],"number":[138],"threads.":[140]},"counts_by_year":[{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
