{"id":"https://openalex.org/W2135234700","doi":"https://doi.org/10.1145/1216544.1216550","title":"Single-dimension software pipelining for multidimensional loops","display_name":"Single-dimension software pipelining for multidimensional loops","publication_year":2007,"publication_date":"2007-03-01","ids":{"openalex":"https://openalex.org/W2135234700","doi":"https://doi.org/10.1145/1216544.1216550","mag":"2135234700"},"language":"en","primary_location":{"id":"doi:10.1145/1216544.1216550","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1216544.1216550","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1216544.1216550","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/1216544.1216550","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023962594","display_name":"Hongbo Rong","orcid":"https://orcid.org/0000-0002-3275-7791"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongbo Rong","raw_affiliation_strings":["Microsoft Corporation, Redmond, Washington"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, Washington","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102349684","display_name":"Zhizhong Tang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizhong Tang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113510967","display_name":"R. Govindarajan","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"R. Govindarajan","raw_affiliation_strings":["Indian Institute of Science, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science, Bangalore, India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021284960","display_name":"Alban Douillet","orcid":null},"institutions":[{"id":"https://openalex.org/I1324840837","display_name":"Hewlett-Packard (United States)","ror":"https://ror.org/059rn9488","country_code":"US","type":"company","lineage":["https://openalex.org/I1324840837"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alban Douillet","raw_affiliation_strings":["Hewlett-Packard Company, Palo Alto, California"],"affiliations":[{"raw_affiliation_string":"Hewlett-Packard Company, Palo Alto, California","institution_ids":["https://openalex.org/I1324840837"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046024163","display_name":"Guang R. Gao","orcid":"https://orcid.org/0000-0002-5265-7528"},"institutions":[{"id":"https://openalex.org/I86501945","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82","country_code":"US","type":"education","lineage":["https://openalex.org/I86501945"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guang R. Gao","raw_affiliation_strings":["University of Delaware, Newark, Delaware","University of Delaware, Newark, Delaware*"],"affiliations":[{"raw_affiliation_string":"University of Delaware, Newark, Delaware","institution_ids":["https://openalex.org/I86501945"]},{"raw_affiliation_string":"University of Delaware, Newark, Delaware*","institution_ids":["https://openalex.org/I86501945"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023962594"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":6.9665,"has_fulltext":true,"cited_by_count":42,"citation_normalized_percentile":{"value":0.96967889,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"4","issue":"1","first_page":"7","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-pipelining","display_name":"Software pipelining","score":0.9396265745162964},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8287252187728882},{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.7382457852363586},{"id":"https://openalex.org/keywords/loop-fission","display_name":"Loop fission","score":0.720440685749054},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6864184737205505},{"id":"https://openalex.org/keywords/nested-loop-join","display_name":"Nested loop join","score":0.6392459869384766},{"id":"https://openalex.org/keywords/modulo","display_name":"Modulo","score":0.6190496683120728},{"id":"https://openalex.org/keywords/loop-fusion","display_name":"Loop fusion","score":0.6046770811080933},{"id":"https://openalex.org/keywords/loop-optimization","display_name":"Loop optimization","score":0.47067198157310486},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4670637547969818},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4580037593841553},{"id":"https://openalex.org/keywords/high-level-synthesis","display_name":"High-level synthesis","score":0.45089390873908997},{"id":"https://openalex.org/keywords/polytope-model","display_name":"Polytope model","score":0.43221551179885864},{"id":"https://openalex.org/keywords/inner-loop","display_name":"Inner loop","score":0.4155384302139282},{"id":"https://openalex.org/keywords/loop","display_name":"Loop (graph theory)","score":0.41282233595848083},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.37657657265663147},{"id":"https://openalex.org/keywords/optimizing-compiler","display_name":"Optimizing compiler","score":0.26828959584236145},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.26488226652145386},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18158555030822754},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.1566966474056244},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.14345991611480713},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14170747995376587},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.11192676424980164}],"concepts":[{"id":"https://openalex.org/C188854837","wikidata":"https://www.wikidata.org/wiki/Q268469","display_name":"Software pipelining","level":3,"score":0.9396265745162964},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8287252187728882},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.7382457852363586},{"id":"https://openalex.org/C134718785","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fission","level":3,"score":0.720440685749054},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6864184737205505},{"id":"https://openalex.org/C1306188","wikidata":"https://www.wikidata.org/wiki/Q4060687","display_name":"Nested loop join","level":2,"score":0.6392459869384766},{"id":"https://openalex.org/C54732982","wikidata":"https://www.wikidata.org/wiki/Q1415345","display_name":"Modulo","level":2,"score":0.6190496683120728},{"id":"https://openalex.org/C82653869","wikidata":"https://www.wikidata.org/wiki/Q6675821","display_name":"Loop fusion","level":3,"score":0.6046770811080933},{"id":"https://openalex.org/C29331672","wikidata":"https://www.wikidata.org/wiki/Q3354468","display_name":"Loop optimization","level":4,"score":0.47067198157310486},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4670637547969818},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4580037593841553},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.45089390873908997},{"id":"https://openalex.org/C113391598","wikidata":"https://www.wikidata.org/wiki/Q1681391","display_name":"Polytope model","level":3,"score":0.43221551179885864},{"id":"https://openalex.org/C58716799","wikidata":"https://www.wikidata.org/wiki/Q6035648","display_name":"Inner loop","level":3,"score":0.4155384302139282},{"id":"https://openalex.org/C184670325","wikidata":"https://www.wikidata.org/wiki/Q512604","display_name":"Loop (graph theory)","level":2,"score":0.41282233595848083},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.37657657265663147},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.26828959584236145},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.26488226652145386},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18158555030822754},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.1566966474056244},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.14345991611480713},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14170747995376587},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.11192676424980164},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.0},{"id":"https://openalex.org/C145691206","wikidata":"https://www.wikidata.org/wiki/Q747980","display_name":"Polytope","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1216544.1216550","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1216544.1216550","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1216544.1216550","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/1216544.1216550","is_oa":true,"landing_page_url":"https://doi.org/10.1145/1216544.1216550","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/1216544.1216550","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2135234700.pdf","grobid_xml":"https://content.openalex.org/works/W2135234700.grobid-xml"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W90084074","https://openalex.org/W1494930385","https://openalex.org/W1504751168","https://openalex.org/W1511601046","https://openalex.org/W1514857085","https://openalex.org/W1527396157","https://openalex.org/W1564397886","https://openalex.org/W1837593827","https://openalex.org/W1908397450","https://openalex.org/W1963718362","https://openalex.org/W1976339994","https://openalex.org/W2018235128","https://openalex.org/W2034393996","https://openalex.org/W2038495591","https://openalex.org/W2042766074","https://openalex.org/W2053138232","https://openalex.org/W2077960035","https://openalex.org/W2083264194","https://openalex.org/W2100097836","https://openalex.org/W2113679613","https://openalex.org/W2115090673","https://openalex.org/W2119609467","https://openalex.org/W2122418911","https://openalex.org/W2123412205","https://openalex.org/W2132554738","https://openalex.org/W2138007781","https://openalex.org/W2140311411","https://openalex.org/W2156172744","https://openalex.org/W2157758640","https://openalex.org/W2158737060","https://openalex.org/W2164890169","https://openalex.org/W2215244614","https://openalex.org/W2244841219","https://openalex.org/W2296760900","https://openalex.org/W2592270044","https://openalex.org/W4232919122","https://openalex.org/W4240874773","https://openalex.org/W4241752870"],"related_works":["https://openalex.org/W217231416","https://openalex.org/W2014071052","https://openalex.org/W2586845463","https://openalex.org/W2080027614","https://openalex.org/W2103891735","https://openalex.org/W2017719803","https://openalex.org/W2052992886","https://openalex.org/W2048382960","https://openalex.org/W1490356190","https://openalex.org/W2135234700"],"abstract_inverted_index":{"Traditionally,":[0],"software":[1,31,36,70,250],"pipelining":[2,32,71,251,263],"is":[3,139,148,264],"applied":[4],"either":[5],"to":[6,20,35,152,160,188,207,247],"the":[7,17,64,89,95,111,114,124,128,132,198,224,228],"innermost":[8,18],"loop":[9,13,19,39,44,67,97,191,233,255,269],"of":[10,74,94,214],"a":[11,26,38,48,100,105,118,144,220],"given":[12],"nest":[14,40],"or":[15,80],"from":[16,235],"outer":[21],"loops.":[22],"This":[23],"paper":[24],"proposes":[25],"three-step":[27],"approach,":[28],"called":[29],"single-dimension":[30],"(SSP)":[33],",":[34],"pipeline":[37],"at":[41,172,197,252],"an":[42,253],"arbitrary":[43],"level":[45,68,98,256],"that":[46,122,166],"has":[47],"rectangular":[49],"iteration":[50],"space":[51],"and":[52,103,171,212,237],"contains":[53],"no":[54],"sibling":[55],"inner":[56],"loops":[57],"in":[58,72,131,156,223,258],"it.":[59],"The":[60,85,135],"first":[61],"step":[62,87,116],"identifies":[63],"most":[65,195],"profitable":[66],"for":[69,127,227],"terms":[73],"initiation":[75,202],"rate,":[76],"data":[77],"reuse":[78],"potential,":[79],"any":[81],"other":[82],"optimization":[83],"criteria.":[84],"second":[86],"simplifies":[88],"multidimensional":[90,133],"data-dependence":[91],"graph":[92],"(DDG)":[93],"selected":[96],"into":[99],"one-dimensional":[101,106,112],"DDG":[102],"constructs":[104],"(1D)":[107],"schedule.":[108],"Based":[109],"on":[110],"schedule,":[113],"third":[115],"derives":[117],"simple":[119],"mapping":[120],"function":[121],"specifies":[123],"schedule":[125,189],"time":[126],"operation":[129],"instances":[130],"loop.":[134],"classical":[136],"modulo":[137,182,248],"scheduling":[138,183],"subsumed":[140],"by":[141,180,219],"SSP":[142,147,167,187],"as":[143,174,176],"special":[145],"case.":[146],"also":[149],"closely":[150],"related":[151],"hyperplane":[153],"scheduling,":[154,249],"and,":[155],"fact,":[157],"extends":[158],"it":[159],"be":[161],"resource":[162],"constrained.":[163],"We":[164,185],"prove":[165],"schedules":[168,178],"are":[169,194,204,217],"correct":[170],"least":[173],"efficient":[175],"those":[177],"generated":[179],"traditional":[181],"methods.":[184],"extend":[186],"imperfect":[190],"nests,":[192],"which":[193],"common":[196],"instruction":[199],"level.":[200],"Multiple":[201],"intervals":[203],"naturally":[205],"allowed":[206],"improve":[208],"execution":[209],"efficiency.":[210],"Feasibility":[211],"correctness":[213],"our":[215],"approach":[216],"verified":[218],"prototype":[221],"implementation":[222],"ORC":[225],"compiler":[226],"IA-64":[229],"architecture,":[230],"tested":[231],"with":[232,267],"nests":[234],"Livermore":[236],"SPEC2000":[238],"floating-point":[239],"benchmarks.":[240],"Preliminary":[241],"experimental":[242],"results":[243,257],"reveal":[244],"that,":[245],"compared":[246],"appropriate":[254],"significant":[259],"performance":[260],"improvement.":[261],"Software":[262],"beneficial":[265],"even":[266],"prior":[268],"transformations.":[270]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
