{"id":"https://openalex.org/W4309561838","doi":"https://doi.org/10.1145/3571856","title":"An Optimized Framework for Matrix Factorization on the New Sunway Many-core Platform","display_name":"An Optimized Framework for Matrix Factorization on the New Sunway Many-core Platform","publication_year":2022,"publication_date":"2022-11-19","ids":{"openalex":"https://openalex.org/W4309561838","doi":"https://doi.org/10.1145/3571856"},"language":"en","primary_location":{"id":"doi:10.1145/3571856","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3571856","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3571856","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3571856","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100695928","display_name":"Wenjing Ma","orcid":"https://orcid.org/0000-0002-1795-4498"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4391767820","display_name":"State Key Laboratory of Computer Science","ror":"https://ror.org/01hsx4r68","country_code":null,"type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818","https://openalex.org/I4391767820"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenjing Ma","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China and State Key Laboratory of Computer Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China and State Key Laboratory of Computer Science, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366","https://openalex.org/I4391767820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070636985","display_name":"Fangfang Liu","orcid":"https://orcid.org/0000-0001-7344-7493"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4391767820","display_name":"State Key Laboratory of Computer Science","ror":"https://ror.org/01hsx4r68","country_code":null,"type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818","https://openalex.org/I4391767820"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangfang Liu","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China and State Key Laboratory of Computer Science, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China and State Key Laboratory of Computer Science, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366","https://openalex.org/I4391767820"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046055268","display_name":"Daokun Chen","orcid":"https://orcid.org/0000-0003-2823-7213"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daokun Chen","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019136782","display_name":"Qinglin Lu","orcid":"https://orcid.org/0000-0002-8105-147X"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinglin Lu","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5097193991","display_name":"Yi Hu","orcid":"https://orcid.org/0000-0003-4163-6817"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Hu","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029446934","display_name":"Hongsen Wang","orcid":"https://orcid.org/0000-0002-1245-2130"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongsen Wang","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021551205","display_name":"Xinhui Yuan","orcid":"https://orcid.org/0000-0002-1375-6435"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xinhui Yuan","raw_affiliation_strings":["National Research Centre of Parallel Computer Engineering and Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"National Research Centre of Parallel Computer Engineering and Technology, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100695928"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210128818","https://openalex.org/I4391767820"],"apc_list":null,"apc_paid":null,"fwci":0.2318,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.51927602,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"20","issue":"2","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.8657349348068237},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8130245208740234},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7122480273246765},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.7095319628715515},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6922875642776489},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5581870675086975},{"id":"https://openalex.org/keywords/incomplete-lu-factorization","display_name":"Incomplete LU factorization","score":0.5484132170677185},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.46383699774742126},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.45587676763534546},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4372147023677826},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3232004642486572},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.11502009630203247}],"concepts":[{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.8657349348068237},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8130245208740234},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7122480273246765},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.7095319628715515},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6922875642776489},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5581870675086975},{"id":"https://openalex.org/C134978465","wikidata":"https://www.wikidata.org/wiki/Q1654069","display_name":"Incomplete LU factorization","level":4,"score":0.5484132170677185},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.46383699774742126},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.45587676763534546},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4372147023677826},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3232004642486572},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.11502009630203247},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3571856","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3571856","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3571856","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3571856","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3571856","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3571856","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4309561838.pdf","grobid_xml":"https://content.openalex.org/works/W4309561838.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W177350716","https://openalex.org/W1563304459","https://openalex.org/W1656804177","https://openalex.org/W1972091731","https://openalex.org/W2006682733","https://openalex.org/W2007578977","https://openalex.org/W2032052535","https://openalex.org/W2033614580","https://openalex.org/W2049926709","https://openalex.org/W2061781302","https://openalex.org/W2087421046","https://openalex.org/W2114463594","https://openalex.org/W2124480634","https://openalex.org/W2139116943","https://openalex.org/W2162322364","https://openalex.org/W2299229614","https://openalex.org/W2528423095","https://openalex.org/W2530848289","https://openalex.org/W2563513434","https://openalex.org/W2576489192","https://openalex.org/W2621991888","https://openalex.org/W2625458075","https://openalex.org/W2754194450","https://openalex.org/W2887881931","https://openalex.org/W3009472344","https://openalex.org/W3041433688","https://openalex.org/W3208078891","https://openalex.org/W3210551216","https://openalex.org/W4229666556","https://openalex.org/W4231150350","https://openalex.org/W4251384337","https://openalex.org/W4300129074","https://openalex.org/W4365800014"],"related_works":["https://openalex.org/W2164374667","https://openalex.org/W1881162179","https://openalex.org/W2095258827","https://openalex.org/W2375608986","https://openalex.org/W2060455298","https://openalex.org/W4250081984","https://openalex.org/W648090951","https://openalex.org/W3096060745","https://openalex.org/W119752240","https://openalex.org/W1973739845"],"abstract_inverted_index":{"Matrix":[0],"factorization":[1,26,32,82,106,153,217,232,259],"functions":[2,27,107,138,172],"are":[3,28,63],"used":[4,256],"in":[5,14,67,90,158,257],"many":[6,68],"areas":[7],"and":[8,59,135,142,178,185,226,248,275],"often":[9],"play":[10],"an":[11],"important":[12],"role":[13],"the":[15,19,22,37,40,46,49,53,64,72,77,88,91,94,109,115,133,149,159,183,195,207,211,215,229,235,258,282,287],"overall":[16],"performance":[17,54,78],"of":[18,36,80,221,266,300],"applications.":[20,70],"In":[21],"LAPACK":[23,197],"library,":[24],"matrix":[25,105,231],"implemented":[29,170],"with":[30,125,139,155,165,189,200],"blocked":[31],"algorithm,":[33],"shifting":[34],"most":[35,212],"workload":[38],"to":[39,223,253,281,296],"high-performance":[41],"Level-3":[42],"BLAS":[43,137,202],"functions.":[44,233,260,303],"But":[45],"non-blocked":[47],"part,":[48],"panel":[50,81,89,95,117,152,216],"factorization,":[51],"becomes":[52],"bottleneck,":[55],"especially":[56],"for":[57,93,103,132,228],"small-":[58],"medium-size":[60],"matrices":[61],"that":[62,204],"common":[65],"cases":[66],"real":[69],"On":[71],"new":[73,101,110],"Sunway":[74,111],"many-core":[75,112],"platform,":[76,113],"bottleneck":[79],"can":[83,143,162,293],"be":[84,163,294],"alleviated":[85],"by":[86],"keeping":[87],"LDM":[92,160],"factorization.":[96,118],"Therefore,":[97],"we":[98,249,262],"propose":[99],"a":[100,122,190,243],"framework":[102,120,184],"implementing":[104],"on":[108,182,206,242,268,272,277,286],"facilitating":[114],"in-LDM":[116],"The":[119],"provides":[121],"template":[123,289],"class":[124],"wrapper":[126],"functions,":[127],"which":[128,193],"integrates":[129],"inter-CPE":[130],"communication":[131],"Level-1":[134],"Level-2":[136],"flexible":[140],"interfaces":[141],"accommodate":[144],"different":[145],"partitioning":[146],"schemes.":[147],"With":[148],"framework,":[150,247],"writing":[151],"code":[154],"data":[156],"residing":[157],"space":[161],"done":[164],"much":[166],"higher":[167],"productivity.":[168],"We":[169],"three":[171,230],"(":[173],"dgetrf":[174,269],",":[175,177,270,274,279],"dgeqrf":[176,273],"dpotrf":[179,278],")":[180],"based":[181,241],"compared":[186,280],"our":[187,238,291],"work":[188,292],"CPE_BLAS":[191,283],"version,":[192],"uses":[194],"original":[196],"implementation":[198,239],"linked":[199],"optimized":[201],"library":[203],"runs":[205],"CPE":[208],"mesh.":[209],"Using":[210],"favorable":[213],"partitioning,":[214],"part":[218],"achieves":[219],"speedup":[220,265],"up":[222],"26.3,":[224],"19.1,":[225],"18.2":[227],"For":[234],"whole":[236],"function,":[237],"is":[240],"carefully":[244],"tuned":[245],"recursion":[246],"added":[250],"specific":[251],"optimization":[252],"some":[254],"subroutines":[255],"Overall,":[261],"obtained":[263],"average":[264],"9.76":[267],"10.12":[271],"4.16":[276],"version.":[284],"Based":[285],"current":[288],"class,":[290],"extended":[295],"support":[297],"more":[298],"categories":[299],"linear":[301],"algebra":[302]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
