{"id":"https://openalex.org/W2794160862","doi":"https://doi.org/10.1145/3177960","title":"Enabling SIMT Execution Model on Homogeneous Multi-Core System","display_name":"Enabling SIMT Execution Model on Homogeneous Multi-Core System","publication_year":2018,"publication_date":"2018-03-22","ids":{"openalex":"https://openalex.org/W2794160862","doi":"https://doi.org/10.1145/3177960","mag":"2794160862"},"language":"en","primary_location":{"id":"doi:10.1145/3177960","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3177960","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3177960&type=pdf","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=3177960&type=pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103184352","display_name":"Kuan\u2010Chung Chen","orcid":"https://orcid.org/0000-0002-9699-2993"},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Kuan-Chung Chen","raw_affiliation_strings":["National Cheng Kung University, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-9699-2993","affiliations":[{"raw_affiliation_string":"National Cheng Kung University, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111582905","display_name":"Chung\u2010Ho Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I91807558","display_name":"National Cheng Kung University","ror":"https://ror.org/01b8kcc49","country_code":"TW","type":"education","lineage":["https://openalex.org/I91807558"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chung-Ho Chen","raw_affiliation_strings":["National Cheng Kung University, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Cheng Kung University, Taiwan","institution_ids":["https://openalex.org/I91807558"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9502,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.74607753,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"15","issue":"1","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9023959636688232},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8244277238845825},{"id":"https://openalex.org/keywords/mimd","display_name":"MIMD","score":0.7564060688018799},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7440812587738037},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.7013716101646423},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.6465473771095276},{"id":"https://openalex.org/keywords/execution-model","display_name":"Execution model","score":0.5323774218559265},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.5203908681869507},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4979526996612549},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4700792133808136},{"id":"https://openalex.org/keywords/simultaneous-multithreading","display_name":"Simultaneous multithreading","score":0.4216018319129944},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18641206622123718},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.12833982706069946}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9023959636688232},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8244277238845825},{"id":"https://openalex.org/C21032095","wikidata":"https://www.wikidata.org/wiki/Q1149237","display_name":"MIMD","level":2,"score":0.7564060688018799},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7440812587738037},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.7013716101646423},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.6465473771095276},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.5323774218559265},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.5203908681869507},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4979526996612549},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4700792133808136},{"id":"https://openalex.org/C85717602","wikidata":"https://www.wikidata.org/wiki/Q82178","display_name":"Simultaneous multithreading","level":4,"score":0.4216018319129944},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18641206622123718},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.12833982706069946}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3177960","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3177960","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3177960&type=pdf","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3177960","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3177960","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=3177960&type=pdf","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7220481793","display_name":null,"funder_award_id":"MOST 103-2221-E-006-266-MY3","funder_id":"https://openalex.org/F4320322795","funder_display_name":"Ministry of Science and Technology, Taiwan"}],"funders":[{"id":"https://openalex.org/F4320322795","display_name":"Ministry of Science and Technology, Taiwan","ror":"https://ror.org/02kv4zf79"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2794160862.pdf","grobid_xml":"https://content.openalex.org/works/W2794160862.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W348433680","https://openalex.org/W1492601037","https://openalex.org/W1502558230","https://openalex.org/W1552624537","https://openalex.org/W1555915743","https://openalex.org/W1968775993","https://openalex.org/W1971997351","https://openalex.org/W1973538724","https://openalex.org/W1991592471","https://openalex.org/W1997432558","https://openalex.org/W1998886328","https://openalex.org/W2000335122","https://openalex.org/W2010936531","https://openalex.org/W2026418498","https://openalex.org/W2028914809","https://openalex.org/W2031781358","https://openalex.org/W2040566380","https://openalex.org/W2047060659","https://openalex.org/W2052787948","https://openalex.org/W2066862978","https://openalex.org/W2080592089","https://openalex.org/W2081373884","https://openalex.org/W2086698285","https://openalex.org/W2090584832","https://openalex.org/W2095314640","https://openalex.org/W2103742924","https://openalex.org/W2106562406","https://openalex.org/W2110954112","https://openalex.org/W2120230074","https://openalex.org/W2124556751","https://openalex.org/W2125979435","https://openalex.org/W2135947393","https://openalex.org/W2153185479","https://openalex.org/W2155568054","https://openalex.org/W2156831150","https://openalex.org/W2170997622","https://openalex.org/W2249344705","https://openalex.org/W2273348289","https://openalex.org/W2409690919","https://openalex.org/W2509821985","https://openalex.org/W2619705232","https://openalex.org/W3103903655"],"related_works":["https://openalex.org/W2010970156","https://openalex.org/W2293060199","https://openalex.org/W2074226157","https://openalex.org/W2075849238","https://openalex.org/W2082701182","https://openalex.org/W2544870670","https://openalex.org/W4302771143","https://openalex.org/W3036939780","https://openalex.org/W1996932109","https://openalex.org/W2794160862"],"abstract_inverted_index":{"Single-instruction":[0],"multiple-thread":[1],"(SIMT)":[2],"machine":[3],"emerges":[4],"as":[5],"a":[6,78],"primary":[7],"computing":[8],"device":[9],"in":[10,37,52,97,132],"high-perfor-mance":[11],"computing,":[12],"since":[13],"the":[14,26,44,72,89,93,124,143,157,171,176,178,185,193,205,225],"SIMT":[15,27,54,73,144,179,197,226],"execution":[16,28,55,62,74],"paradigm":[17],"can":[18],"exploit":[19,221],"data-level":[20,222],"parallelism":[21],"effectively.":[22],"This":[23],"article":[24],"explores":[25],"potential":[29],"on":[30,57,82,136,148,175,228],"homogeneous":[31,229],"multi-core":[32,45,58,85,230],"processors,":[33],"which":[34],"generally":[35],"run":[36],"multiple-instruction":[38],"multiple-data":[39],"(MIMD)":[40],"mode":[41],"when":[42],"utilizing":[43],"resources.":[46],"We":[47],"address":[48],"three":[49],"architecture":[50],"issues":[51],"enabling":[53,224],"model":[56,180,198,207,227],"processor,":[59],"including":[60],"multithreading":[61,80],"model,":[63,75],"kernel":[64,94],"thread":[65,69,95],"context":[66],"placement,":[67],"and":[68,121,141,151,188],"divergence.":[70],"For":[71,106],"we":[76],"propose":[77],"fine-grained":[79],"mechanism":[81,115],"an":[83,109],"ARM-based":[84],"system.":[86],"Each":[87],"of":[88],"processor":[90,167,202,212],"cores":[91,203,213],"stores":[92],"contexts":[96],"its":[98],"L1":[99],"data":[100],"cache":[101],"for":[102,160,163],"per-cycle":[103],"thread-switching":[104],"requirement.":[105],"divergence-intensive":[107],"kernels,":[108,177],"Inner":[110],"Conditional":[111],"Statement":[112],"First":[113],"(ICS-First)":[114],"helps":[116],"early":[117],"re-convergence":[118],"to":[119,146,153,220],"occur":[120],"significantly":[122],"improves":[123],"performance.":[125],"The":[126,196,216],"experiment":[127],"results":[128,217],"show":[129,218],"that":[130,208],"effectiveness":[131],"data-parallel":[133],"processing":[134],"reduces":[135],"average":[137,149],"36%":[138],"dynamic":[139],"instructions,":[140],"boosts":[142],"executions":[145],"achieve":[147],"1.52\u00d7":[150],"up":[152],"5\u00d7":[154],"speedups":[155],"over":[156,192],"MIMD":[158,194,206],"counterpart":[159],"OpenCL":[161],"benchmarks":[162],"single":[164],"issue":[165],"in-order":[166,200],"cores.":[168],"By":[169],"using":[170,199],"explicit":[172],"vectorization":[173],"optimization":[174],"gains":[181],"further":[182],"benefits":[183],"from":[184],"SIMD":[186],"extension":[187],"achieves":[189],"1.71\u00d7":[190],"speedup":[191],"approach.":[195],"superscalar":[201,210],"outperforms":[204],"uses":[209],"out-of-order":[211],"by":[214],"40%.":[215],"that,":[219],"parallelism,":[223],"processors":[231],"is":[232],"important.":[233]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
