{"id":"https://openalex.org/W4319660631","doi":"https://doi.org/10.1587/transfun.2022eap1084","title":"LFWS: Long-Operation First Warp Scheduling Algorithm to Effectively Hide the Latency for GPUs","display_name":"LFWS: Long-Operation First Warp Scheduling Algorithm to Effectively Hide the Latency for GPUs","publication_year":2023,"publication_date":"2023-02-09","ids":{"openalex":"https://openalex.org/W4319660631","doi":"https://doi.org/10.1587/transfun.2022eap1084"},"language":"en","primary_location":{"id":"doi:10.1587/transfun.2022eap1084","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1587/transfun.2022eap1084","pdf_url":"https://www.jstage.jst.go.jp/article/transfun/E106.A/8/E106.A_2022EAP1084/_pdf","source":{"id":"https://openalex.org/S166990724","display_name":"IEICE Transactions on Fundamentals of Electronics Communications and Computer Sciences","issn_l":"0916-8508","issn":["0916-8508","1745-1337"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.jstage.jst.go.jp/article/transfun/E106.A/8/E106.A_2022EAP1084/_pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010130572","display_name":"Song LIU","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Song LIU","raw_affiliation_strings":["School of Computer Science and Technology, Xi'an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100676365","display_name":"Jie Ma","orcid":"https://orcid.org/0000-0003-3690-8240"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie MA","raw_affiliation_strings":["School of Computer Science and Technology, Xi'an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027580685","display_name":"Chenyu Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenyu ZHAO","raw_affiliation_strings":["School of Computer Science and Technology, Xi'an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027014817","display_name":"Xinhe WAN","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinhe WAN","raw_affiliation_strings":["School of Computer Science and Technology, Xi'an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029236990","display_name":"Weiguo Wu","orcid":"https://orcid.org/0000-0002-1179-3435"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiguo WU","raw_affiliation_strings":["School of Computer Science and Technology, Xi'an Jiaotong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Xi'an Jiaotong University","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2883,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.44410646,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"E106.A","issue":"8","first_page":"1043","last_page":"1050"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8630244135856628},{"id":"https://openalex.org/keywords/queue","display_name":"Queue","score":0.7078773975372314},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6995898485183716},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6899802088737488},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6436102390289307},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5434527397155762},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3675123453140259},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3480961322784424},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.08516684174537659},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.07474857568740845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8630244135856628},{"id":"https://openalex.org/C160403385","wikidata":"https://www.wikidata.org/wiki/Q220543","display_name":"Queue","level":2,"score":0.7078773975372314},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6995898485183716},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6899802088737488},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6436102390289307},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5434527397155762},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3675123453140259},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3480961322784424},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.08516684174537659},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.07474857568740845},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1587/transfun.2022eap1084","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1587/transfun.2022eap1084","pdf_url":"https://www.jstage.jst.go.jp/article/transfun/E106.A/8/E106.A_2022EAP1084/_pdf","source":{"id":"https://openalex.org/S166990724","display_name":"IEICE Transactions on Fundamentals of Electronics Communications and Computer Sciences","issn_l":"0916-8508","issn":["0916-8508","1745-1337"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1587/transfun.2022eap1084","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1587/transfun.2022eap1084","pdf_url":"https://www.jstage.jst.go.jp/article/transfun/E106.A/8/E106.A_2022EAP1084/_pdf","source":{"id":"https://openalex.org/S166990724","display_name":"IEICE Transactions on Fundamentals of Electronics Communications and Computer Sciences","issn_l":"0916-8508","issn":["0916-8508","1745-1337"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4319660631.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W2047060659","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2096661534","https://openalex.org/W2111669132","https://openalex.org/W2328929289","https://openalex.org/W2404839908","https://openalex.org/W2589307556","https://openalex.org/W2892033614","https://openalex.org/W2899489366","https://openalex.org/W2911720734","https://openalex.org/W2955569529","https://openalex.org/W2963708526","https://openalex.org/W2991043486","https://openalex.org/W3088700869","https://openalex.org/W3102510044","https://openalex.org/W3207973653","https://openalex.org/W4244089596","https://openalex.org/W4249082578"],"related_works":["https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W3213381848","https://openalex.org/W2017587301","https://openalex.org/W2012954338","https://openalex.org/W2005148983","https://openalex.org/W2096672917","https://openalex.org/W2009169896","https://openalex.org/W2392023973","https://openalex.org/W2939411666"],"abstract_inverted_index":{"GPUs":[0,63],"have":[1],"become":[2],"the":[3,9,19,24,65,92,101,109,112,117,120,131,145,149,156,163,170,175,179,182,186,203,219],"dominant":[4],"computing":[5,28],"units":[6],"to":[7,49,70,95,106,144,159,173,201],"meet":[8],"need":[10],"of":[11,26,67,111,133,162,181,205,227],"high":[12],"performance":[13,32,204,225],"in":[14,31,91,103,108,119,135,148],"various":[15,198],"computational":[16,243],"fields.":[17],"But":[18],"long":[20,68,124,157],"operation":[21,127],"latency":[22,51,164],"causes":[23],"underutilization":[25],"on-chip":[27],"resources,":[29],"resulting":[30],"degradation":[33],"when":[34],"running":[35],"parallel":[36],"tasks":[37],"on":[38,62,130,189,246],"GPUs.":[39],"A":[40],"good":[41],"warp":[42,59,80,147,212,238],"scheduling":[43,60,81,213,239],"strategy":[44],"is":[45],"an":[46,223],"effective":[47],"solution":[48],"hide":[50,71,160,174],"and":[52,99,125,139,168,229,235],"improve":[53],"resource":[54,244],"utilization.":[55],"However,":[56],"most":[57],"current":[58],"algorithms":[61],"ignore":[64],"ability":[66,172],"operations":[69,134,158],"latency.":[72,176],"In":[73],"this":[74],"paper,":[75],"we":[76,184],"propose":[77],"a":[78,96,190],"long-operation-first":[79],"algorithm,":[82,207],"LFWS,":[83,183],"for":[84],"GPU":[85],"platforms.":[86],"The":[87,114,215],"LFWS":[88,115,187,206,220],"filters":[89],"warps":[90,118],"ready":[93,97,121,150],"state":[94],"queue":[98,102,122],"updates":[100],"time":[104],"according":[105],"changes":[107],"status":[110],"warp.":[113],"divides":[116],"into":[123],"short":[126],"groups":[128],"based":[129],"type":[132],"their":[136],"instruction":[137],"buffers,":[138],"it":[140],"gives":[141],"higher":[142],"priority":[143],"long-operating":[146],"queue.":[151],"This":[152],"can":[153],"effectively":[154,241],"use":[155],"some":[161],"from":[165],"each":[166],"other":[167,210],"enhance":[169],"system's":[171],"To":[177],"verify":[178],"effectiveness":[180],"implement":[185],"algorithm":[188,221],"simulation":[191],"platform":[192],"GPGPU-Sim.":[193],"Experiments":[194],"are":[195],"conducted":[196],"over":[197,232],"CUDA":[199],"applications":[200],"evaluate":[202],"compared":[208],"with":[209],"five":[211],"algorithms.":[214],"results":[216],"show":[217],"that":[218],"achieves":[222],"average":[224],"improvement":[226],"8.01%":[228],"5.09%,":[230],"respectively,":[231],"three":[233],"traditional":[234],"two":[236],"novel":[237],"algorithms,":[240],"improving":[242],"utilization":[245],"GPU.":[247]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
