{"id":"https://openalex.org/W2510454942","doi":"https://doi.org/10.1145/2967938.2967959","title":"Speculatively Exploiting Cross-Invocation Parallelism","display_name":"Speculatively Exploiting Cross-Invocation Parallelism","publication_year":2016,"publication_date":"2016-08-31","ids":{"openalex":"https://openalex.org/W2510454942","doi":"https://doi.org/10.1145/2967938.2967959","mag":"2510454942"},"language":"en","primary_location":{"id":"doi:10.1145/2967938.2967959","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2967938.2967959","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2967959&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 International Conference on Parallel Architectures and Compilation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=2967959&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048221375","display_name":"Jialu Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jialu Huang","raw_affiliation_strings":["Google Inc., New York, NY, USA"],"affiliations":[{"raw_affiliation_string":"Google Inc., New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022182349","display_name":"Prakash Prabhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prakash Prabhu","raw_affiliation_strings":["Google Inc., Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Google Inc., Bangalore, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081352952","display_name":"Thomas B. Jablin","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas B. Jablin","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101708119","display_name":"Soumyadeep Ghosh","orcid":"https://orcid.org/0000-0003-2232-5319"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Soumyadeep Ghosh","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042788966","display_name":"Sotiris Apostolakis","orcid":"https://orcid.org/0000-0003-4716-9332"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sotiris Apostolakis","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100415731","display_name":"Jae W. Lee","orcid":"https://orcid.org/0000-0002-4266-4919"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jae W. Lee","raw_affiliation_strings":["Sungkyunkwan University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University, Seoul, South Korea","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008319090","display_name":"David I. August","orcid":"https://orcid.org/0000-0003-3327-1803"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David I. August","raw_affiliation_strings":["Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5048221375"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.08328782,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"207","last_page":"221"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8854919075965881},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.821029543876648},{"id":"https://openalex.org/keywords/speculative-multithreading","display_name":"Speculative multithreading","score":0.6530517339706421},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.593541145324707},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5680620074272156},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.564095675945282},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4967699646949768},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4967196583747864},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4961739480495453},{"id":"https://openalex.org/keywords/implicit-parallelism","display_name":"Implicit parallelism","score":0.4799911677837372},{"id":"https://openalex.org/keywords/speculative-execution","display_name":"Speculative execution","score":0.47907644510269165},{"id":"https://openalex.org/keywords/automatic-parallelization","display_name":"Automatic parallelization","score":0.4624303877353668},{"id":"https://openalex.org/keywords/invocation","display_name":"Invocation","score":0.44946908950805664},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4394471049308777},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.43374043703079224},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.43051135540008545},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.4227561950683594},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.4021451473236084},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.3682495057582855},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2747833728790283},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.13471674919128418}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8854919075965881},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.821029543876648},{"id":"https://openalex.org/C15296174","wikidata":"https://www.wikidata.org/wiki/Q7575343","display_name":"Speculative multithreading","level":4,"score":0.6530517339706421},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.593541145324707},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5680620074272156},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.564095675945282},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4967699646949768},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4967196583747864},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4961739480495453},{"id":"https://openalex.org/C3543717","wikidata":"https://www.wikidata.org/wiki/Q6007302","display_name":"Implicit parallelism","level":4,"score":0.4799911677837372},{"id":"https://openalex.org/C141331961","wikidata":"https://www.wikidata.org/wiki/Q2164465","display_name":"Speculative execution","level":2,"score":0.47907644510269165},{"id":"https://openalex.org/C164833996","wikidata":"https://www.wikidata.org/wiki/Q2323839","display_name":"Automatic parallelization","level":3,"score":0.4624303877353668},{"id":"https://openalex.org/C2776527387","wikidata":"https://www.wikidata.org/wiki/Q1671839","display_name":"Invocation","level":2,"score":0.44946908950805664},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4394471049308777},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.43374043703079224},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.43051135540008545},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.4227561950683594},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.4021451473236084},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3682495057582855},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2747833728790283},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.13471674919128418},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2967938.2967959","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2967938.2967959","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2967959&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 International Conference on Parallel Architectures and Compilation","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/2967938.2967959","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2967938.2967959","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=2967959&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 International Conference on Parallel Architectures and Compilation","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G105962873","display_name":null,"funder_award_id":"OCI-1047879, CCF-1439085, CNS-0964328","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1153485226","display_name":"CSR: Medium: Collaborative Research: Scaling the Implicitly Parallel Programming Model with Lifelong Thread Extraction and Dynamic Adaptation","funder_award_id":"0964328","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2745656620","display_name":"SI2-SSI: Accelerating the Pace of Research through Implicitly Parallel Programming","funder_award_id":"1047879","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3813278024","display_name":"XPS: EXPL: CCA: A Framework for Portable Parallel Performance","funder_award_id":"1439085","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309292","display_name":"Princeton University","ror":"https://ror.org/00hx57361"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2510454942.pdf","grobid_xml":"https://content.openalex.org/works/W2510454942.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W79448102","https://openalex.org/W1494930385","https://openalex.org/W1496148647","https://openalex.org/W1520339130","https://openalex.org/W1536051636","https://openalex.org/W1554398046","https://openalex.org/W1588278888","https://openalex.org/W1590369105","https://openalex.org/W1865312937","https://openalex.org/W1981109753","https://openalex.org/W1986009031","https://openalex.org/W1986913310","https://openalex.org/W1996001376","https://openalex.org/W2012394090","https://openalex.org/W2034883453","https://openalex.org/W2037462607","https://openalex.org/W2039018296","https://openalex.org/W2048319366","https://openalex.org/W2069845873","https://openalex.org/W2083734282","https://openalex.org/W2090408221","https://openalex.org/W2091722458","https://openalex.org/W2092911542","https://openalex.org/W2097172837","https://openalex.org/W2104861744","https://openalex.org/W2110161565","https://openalex.org/W2110710544","https://openalex.org/W2113401164","https://openalex.org/W2113751407","https://openalex.org/W2116170858","https://openalex.org/W2118717320","https://openalex.org/W2121118021","https://openalex.org/W2124090717","https://openalex.org/W2133734540","https://openalex.org/W2134205263","https://openalex.org/W2144404616","https://openalex.org/W2145087641","https://openalex.org/W2147685859","https://openalex.org/W2148255965","https://openalex.org/W2150303942","https://openalex.org/W2151290553","https://openalex.org/W2152657019","https://openalex.org/W2155266510","https://openalex.org/W2155815531","https://openalex.org/W2158308706","https://openalex.org/W2167089275","https://openalex.org/W2167729440","https://openalex.org/W2169875292","https://openalex.org/W2171546589","https://openalex.org/W3145483441","https://openalex.org/W4240506541","https://openalex.org/W4243750660","https://openalex.org/W4249224771","https://openalex.org/W4254516222","https://openalex.org/W6632960179","https://openalex.org/W6995434384"],"related_works":["https://openalex.org/W2034886984","https://openalex.org/W2135470015","https://openalex.org/W2905048389","https://openalex.org/W4247496551","https://openalex.org/W2164579444","https://openalex.org/W1498870995","https://openalex.org/W2126220043","https://openalex.org/W1606313797","https://openalex.org/W1972872690","https://openalex.org/W1567437828"],"abstract_inverted_index":{"Automatic":[0],"parallelization":[1,86],"has":[2],"shown":[3],"promise":[4],"in":[5],"producing":[6],"scalable":[7],"multi-threaded":[8],"programs":[9,28,130],"for":[10],"multi-core":[11],"architectures.":[12],"Most":[13],"existing":[14],"automatic":[15,85],"techniques":[16,41],"parallelize":[17],"independent":[18],"loops":[19],"and":[20,104],"insert":[21],"global":[22],"synchronization":[23,34],"between":[24],"loop":[25,31,99],"invocations.":[26],"For":[27],"with":[29],"many":[30],"invocations,":[32],"frequent":[33],"often":[35],"becomes":[36],"the":[37,105],"performance":[38],"bottleneck.":[39],"Some":[40],"exploit":[42,89],"cross-invocation":[43,90,144],"parallelism":[44,91],"to":[45,57,88,114,116],"overcome":[46],"this":[47,62,78,80],"problem.":[48],"Using":[49],"static":[50],"analysis,":[51],"they":[52],"partition":[53],"iterations":[54,96],"among":[55],"threads":[56],"avoid":[58],"cross-thread":[59],"dependences.":[60],"However,":[61],"approach":[63],"may":[64],"fail":[65],"if":[66],"dependence":[67,117],"pattern":[68],"information":[69],"is":[70],"not":[71],"available":[72],"at":[73,125],"compile":[74],"time.":[75],"To":[76],"address":[77],"limitation,":[79],"work":[81],"proposes":[82],"SpecCross--the":[83],"first":[84],"technique":[87],"using":[92],"speculation.":[93],"With":[94],"speculation,":[95],"from":[97],"different":[98],"invocations":[100],"can":[101],"execute":[102],"concurrently,":[103],"program":[106],"synchronizes":[107],"only":[108,120],"on":[109,122,128],"misspeculation.":[110],"This":[111],"allows":[112],"SpecCross":[113,133],"adapt":[115],"patterns":[118],"that":[119,132],"manifest":[121],"particular":[123],"inputs":[124],"runtime.":[126],"Evaluation":[127],"eight":[129],"shows":[131],"achieves":[134],"a":[135],"geomean":[136],"speedup":[137],"of":[138],"3.43x":[139],"over":[140],"parallel":[141],"execution":[142],"without":[143],"parallelization.":[145]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
