{"id":"https://openalex.org/W3084070644","doi":"https://doi.org/10.1145/3410463.3414649","title":"Model-Based Warp Overlapped Tiling for Image Processing Programs on GPUs","display_name":"Model-Based Warp Overlapped Tiling for Image Processing Programs on GPUs","publication_year":2020,"publication_date":"2020-09-30","ids":{"openalex":"https://openalex.org/W3084070644","doi":"https://doi.org/10.1145/3410463.3414649","mag":"3084070644"},"language":"en","primary_location":{"id":"doi:10.1145/3410463.3414649","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3410463.3414649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.07190","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034240415","display_name":"Abhinav Jangda","orcid":"https://orcid.org/0000-0002-4849-6776"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Abhinav Jangda","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, MA, USA","Univ. of Massachusetts Amherst, Amherst, MA, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, MA, USA","institution_ids":["https://openalex.org/I24603500"]},{"raw_affiliation_string":"Univ. of Massachusetts Amherst, Amherst, MA, USA#TAB#","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044042294","display_name":"Arjun Guha","orcid":"https://orcid.org/0000-0002-7493-3271"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjun Guha","raw_affiliation_strings":["Northeastern University, Boston, MA, USA","Northeastern University , Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Northeastern University , Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034240415"],"corresponding_institution_ids":["https://openalex.org/I24603500"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09539599,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"317","last_page":"328"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8447978496551514},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7255821824073792},{"id":"https://openalex.org/keywords/loop-tiling","display_name":"Loop tiling","score":0.7000824213027954},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5524746179580688},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5397319793701172},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.4296422600746155},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.42769527435302734},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4167185425758362},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.41599804162979126},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.41399461030960083},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.3860602378845215},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2973021864891052},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.2863510251045227},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.1969527006149292},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11735197901725769}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8447978496551514},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7255821824073792},{"id":"https://openalex.org/C11799548","wikidata":"https://www.wikidata.org/wiki/Q6675847","display_name":"Loop tiling","level":3,"score":0.7000824213027954},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5524746179580688},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5397319793701172},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.4296422600746155},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.42769527435302734},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4167185425758362},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.41599804162979126},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.41399461030960083},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3860602378845215},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2973021864891052},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2863510251045227},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.1969527006149292},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11735197901725769},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3410463.3414649","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3410463.3414649","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.07190","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.07190","pdf_url":"https://arxiv.org/pdf/1909.07190","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:3084070644","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1909.07190v2","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1909.07190","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1909.07190","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.07190","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.07190","pdf_url":"https://arxiv.org/pdf/1909.07190","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.6200000047683716,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3084070644.pdf","grobid_xml":"https://content.openalex.org/works/W3084070644.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W1973532523","https://openalex.org/W2044247685","https://openalex.org/W2055312318","https://openalex.org/W2063249715","https://openalex.org/W2077143534","https://openalex.org/W2083056254","https://openalex.org/W2084917734","https://openalex.org/W2119609467","https://openalex.org/W2133352531","https://openalex.org/W2167334577","https://openalex.org/W2415973476","https://openalex.org/W2471164860","https://openalex.org/W2512431201","https://openalex.org/W2579915854","https://openalex.org/W2594003755","https://openalex.org/W2725215886","https://openalex.org/W2766166018","https://openalex.org/W2788111881","https://openalex.org/W2810610794","https://openalex.org/W2891890103","https://openalex.org/W2914500262","https://openalex.org/W2920893842","https://openalex.org/W2927315507","https://openalex.org/W2936463352","https://openalex.org/W2949870253","https://openalex.org/W2961619211","https://openalex.org/W2995257150","https://openalex.org/W3090425474","https://openalex.org/W4235662649","https://openalex.org/W4250027548"],"related_works":["https://openalex.org/W2265231147","https://openalex.org/W2337584457","https://openalex.org/W2046750610","https://openalex.org/W2257971564","https://openalex.org/W1997597015","https://openalex.org/W2885840893","https://openalex.org/W3113619457","https://openalex.org/W2765529683","https://openalex.org/W2067431903","https://openalex.org/W1217358336","https://openalex.org/W1987588924","https://openalex.org/W2309105422","https://openalex.org/W1964704819","https://openalex.org/W2751476053","https://openalex.org/W2913790721","https://openalex.org/W3010779417","https://openalex.org/W2954651520","https://openalex.org/W3202077128","https://openalex.org/W1983220757","https://openalex.org/W2996597839"],"abstract_inverted_index":{"Domain-specific":[0],"languages":[1],"that":[2,51,59,76],"execute":[3],"image":[4,18],"processing":[5],"pipelines":[6],"on":[7],"GPUs,":[8],"such":[9],"as":[10],"Halide":[11],"and":[12,22,67],"Forma,":[13],"operate":[14],"by":[15],"1)~dividing":[16],"the":[17],"into":[19],"overlapped":[20,54],"tiles,":[21],"2)~fusing":[23],"loops":[24],"to":[25],"improve":[26],"memory":[27,62,82],"locality.":[28],"However,":[29],"current":[30],"approaches":[31],"have":[32],"limitations:":[33],"1)~they":[34],"require":[35,52],"intra":[36],"thread":[37],"block":[38],"synchronization,":[39],"which":[40],"has":[41],"a":[42],"nontrivial":[43],"cost,":[44],"2)~they":[45],"must":[46],"choose":[47],"between":[48],"small":[49],"tiles":[50,58],"more":[53],"computations":[55],"or":[56],"large":[57],"increase":[60],"shared":[61],"access":[63],"(and":[64],"lowers":[65],"occupancy),":[66],"3)":[68],"their":[69],"autoscheduling":[70],"algorithms":[71],"use":[72],"simplified":[73],"GPU":[74],"models":[75],"can":[77],"result":[78],"in":[79],"inefficient":[80],"global":[81],"accesses.":[83]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
