{"id":"https://openalex.org/W2074228071","doi":"https://doi.org/10.1145/1187976.1187978","title":"Analysis of cache-coherence bottlenecks with hybrid hardware/software techniques","display_name":"Analysis of cache-coherence bottlenecks with hybrid hardware/software techniques","publication_year":2006,"publication_date":"2006-12-01","ids":{"openalex":"https://openalex.org/W2074228071","doi":"https://doi.org/10.1145/1187976.1187978","mag":"2074228071"},"language":"en","primary_location":{"id":"doi:10.1145/1187976.1187978","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1187976.1187978","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1187976.1187978","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/1187976.1187978","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050458084","display_name":"Jaydeep Marathe","orcid":null},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jaydeep Marathe","raw_affiliation_strings":["North Carolina State University, Raleigh, NC","North Carolina State University, Raleigh, NC;"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC","institution_ids":["https://openalex.org/I137902535"]},{"raw_affiliation_string":"North Carolina State University, Raleigh, NC;","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107279953","display_name":"Frank Mueller","orcid":null},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank Mueller","raw_affiliation_strings":["North Carolina State University, Raleigh, NC","North Carolina State University, Raleigh, NC;"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"North Carolina State University, Raleigh, NC","institution_ids":["https://openalex.org/I137902535"]},{"raw_affiliation_string":"North Carolina State University, Raleigh, NC;","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058719424","display_name":"Bronis R. de Supinski","orcid":"https://orcid.org/0000-0002-0339-1006"},"institutions":[{"id":"https://openalex.org/I1282311441","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282311441","https://openalex.org/I1330989302","https://openalex.org/I198811213","https://openalex.org/I4210138311"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bronis R. de Supinski","raw_affiliation_strings":["Lawrence Livermore National Laboratory, Livermore, CA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lawrence Livermore National Laboratory, Livermore, CA","institution_ids":["https://openalex.org/I1282311441"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5050458084"],"corresponding_institution_ids":["https://openalex.org/I137902535"],"apc_list":null,"apc_paid":null,"fwci":0.2844,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.55537459,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"3","issue":"4","first_page":"390","last_page":"423"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10472","display_name":"Semiconductor materials and devices","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8805018067359924},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.5987831354141235},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5929773449897766},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.5438001155853271},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4775213301181793},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.46657595038414},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.43429189920425415},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.41402679681777954},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3895680904388428},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.3802753984928131},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.36392873525619507},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.3593432903289795},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.2515283226966858},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.21806302666664124},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.21401607990264893}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8805018067359924},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.5987831354141235},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5929773449897766},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.5438001155853271},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4775213301181793},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.46657595038414},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.43429189920425415},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.41402679681777954},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3895680904388428},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3802753984928131},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.36392873525619507},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3593432903289795},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.2515283226966858},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.21806302666664124},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.21401607990264893},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1187976.1187978","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1187976.1187978","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1187976.1187978","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.71.774","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.71.774","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://moss.csc.ncsu.edu/~mueller/ftp/pub/mueller/papers/taco06.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/1187976.1187978","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1187976.1187978","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1187976.1187978","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G107428653","display_name":"CAREER:Exploiting Binary Rewriting to Analyze and Alleviate Memory Bottlenecks for Scientific Applications","funder_award_id":"0237570","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1702950523","display_name":null,"funder_award_id":"W-7405","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G3177268909","display_name":null,"funder_award_id":"-7405-Eng-48","funder_id":"https://openalex.org/F4320338286","funder_display_name":"Lawrence Livermore National Laboratory"},{"id":"https://openalex.org/G439381313","display_name":"NGS:  Collaborative Research: Cross-Platform Performance Estimation for Parallel Applications","funder_award_id":"0406305","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6234810835","display_name":"Collaborative Research: Effective Detection and Alleviation of Scalability Problems","funder_award_id":"0429653","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7117095313","display_name":null,"funder_award_id":"7405-Eng-48","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"},{"id":"https://openalex.org/G8882585095","display_name":null,"funder_award_id":"W-7405-Eng-48","funder_id":"https://openalex.org/F4320338286","funder_display_name":"Lawrence Livermore National Laboratory"},{"id":"https://openalex.org/G8888111501","display_name":"Runtime/Operating System Synergy to Exploit Simultaneous Multithreading","funder_award_id":"0410203","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320338286","display_name":"Lawrence Livermore National Laboratory","ror":"https://ror.org/041nk4h53"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2074228071.pdf","grobid_xml":"https://content.openalex.org/works/W2074228071.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W106099067","https://openalex.org/W1514171102","https://openalex.org/W1560342753","https://openalex.org/W1569032152","https://openalex.org/W1583642359","https://openalex.org/W1804921254","https://openalex.org/W1977263182","https://openalex.org/W1980257390","https://openalex.org/W2011271680","https://openalex.org/W2035330754","https://openalex.org/W2041330544","https://openalex.org/W2080329299","https://openalex.org/W2086890464","https://openalex.org/W2100928955","https://openalex.org/W2104480547","https://openalex.org/W2104916267","https://openalex.org/W2105032049","https://openalex.org/W2106961628","https://openalex.org/W2112121929","https://openalex.org/W2113267301","https://openalex.org/W2113729313","https://openalex.org/W2115617161","https://openalex.org/W2119609467","https://openalex.org/W2134633067","https://openalex.org/W2141342364","https://openalex.org/W2141665948","https://openalex.org/W2144081294","https://openalex.org/W2150196852","https://openalex.org/W2160468841","https://openalex.org/W2165392093","https://openalex.org/W2170088834","https://openalex.org/W3022923721","https://openalex.org/W3041276148","https://openalex.org/W4230846944","https://openalex.org/W4235184194","https://openalex.org/W4239293011","https://openalex.org/W4246788062","https://openalex.org/W4285719527","https://openalex.org/W6631615826"],"related_works":["https://openalex.org/W1539379314","https://openalex.org/W2888673113","https://openalex.org/W2107914397","https://openalex.org/W2056065966","https://openalex.org/W2062641654","https://openalex.org/W2352602608","https://openalex.org/W3149975758","https://openalex.org/W3023262859","https://openalex.org/W2212288070","https://openalex.org/W3213987435"],"abstract_inverted_index":{"Application":[0],"performance":[1,51],"on":[2],"high-performance":[3],"shared-memory":[4],"systems":[5],"is":[6,53,66],"often":[7],"limited":[8],"by":[9,41],"sharing":[10],"patterns":[11],"resulting":[12],"in":[13,58],"cache-coherence":[14],"bottlenecks.":[15],"Current":[16],"approaches":[17],"to":[18,60],"identify":[19],"coherence":[20],"bottlenecks":[21],"incur":[22],"considerable":[23],"run-time":[24],"overhead":[25],"and":[26,75,92],"do":[27],"not":[28],"scale.":[29],"We":[30],"present":[31],"two":[32,42],"novel":[33],"hardware-assisted":[34],"coherence-analysis":[35],"techniques":[36],"that":[37],"reduce":[38],"trace":[39],"sizes":[40],"orders":[43],"of":[44,69],"magnitude":[45,70],"over":[46,94],"full":[47],"traces.":[48],"First,":[49],"hardware":[50],"monitoring":[52],"combined":[54],"with":[55],"capturing":[56],"stores":[57],"software":[59],"provide":[61],"a":[62],"lossy-trace":[63],"mechanism,":[64],"which":[65,87],"an":[67],"order":[68],"faster":[71],"than":[72],"software-instrumentation-based":[73],"full-tracing":[74],"retains":[76],"accuracy.":[77],"Second,":[78],"selected":[79],"long-latency":[80],"loads":[81],"are":[82],"instrumented":[83],"via":[84],"binary":[85],"rewriting,":[86],"provides":[88],"even":[89],"higher":[90],"accuracy":[91],"control":[93],"tracing,":[95],"but":[96],"requires":[97],"additional":[98],"overhead.":[99]},"counts_by_year":[{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
