{"id":"https://openalex.org/W2783448383","doi":"https://doi.org/10.1109/bigdata.2017.8257932","title":"HarpLDA+: Optimizing latent dirichlet allocation for parallel efficiency","display_name":"HarpLDA+: Optimizing latent dirichlet allocation for parallel efficiency","publication_year":2017,"publication_date":"2017-12-01","ids":{"openalex":"https://openalex.org/W2783448383","doi":"https://doi.org/10.1109/bigdata.2017.8257932","mag":"2783448383"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2017.8257932","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8257932","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101652191","display_name":"Bo Peng","orcid":"https://orcid.org/0000-0002-5995-1420"},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bo Peng","raw_affiliation_strings":["School of Informatics and Computing, Indiana University"],"affiliations":[{"raw_affiliation_string":"School of Informatics and Computing, Indiana University","institution_ids":["https://openalex.org/I592451"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010626339","display_name":"Bingjing Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bingjing Zhang","raw_affiliation_strings":["School of Informatics and Computing, Indiana University"],"affiliations":[{"raw_affiliation_string":"School of Informatics and Computing, Indiana University","institution_ids":["https://openalex.org/I592451"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004864004","display_name":"Langshi Chen","orcid":"https://orcid.org/0000-0002-2789-1291"},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Langshi Chen","raw_affiliation_strings":["School of Informatics and Computing, Indiana University"],"affiliations":[{"raw_affiliation_string":"School of Informatics and Computing, Indiana University","institution_ids":["https://openalex.org/I592451"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103106121","display_name":"Mihai Avram","orcid":"https://orcid.org/0000-0003-4710-2525"},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mihai Avram","raw_affiliation_strings":["School of Informatics and Computing, Indiana University"],"affiliations":[{"raw_affiliation_string":"School of Informatics and Computing, Indiana University","institution_ids":["https://openalex.org/I592451"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007822092","display_name":"Robert Henschel","orcid":"https://orcid.org/0000-0003-2289-9398"},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert Henschel","raw_affiliation_strings":["UITS, Indiana University"],"affiliations":[{"raw_affiliation_string":"UITS, Indiana University","institution_ids":["https://openalex.org/I592451"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081468884","display_name":"Craig A. Stewart","orcid":"https://orcid.org/0000-0003-2423-9019"},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Craig Stewart","raw_affiliation_strings":["UITS, Indiana University"],"affiliations":[{"raw_affiliation_string":"UITS, Indiana University","institution_ids":["https://openalex.org/I592451"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089836263","display_name":"Shaojuan Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shaojuan Zhu","raw_affiliation_strings":["Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Intel Corporation","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031145531","display_name":"Emily Mccallum","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Emily Mccallum","raw_affiliation_strings":["Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Intel Corporation","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065231770","display_name":"Lisa Smith","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lisa Smith","raw_affiliation_strings":["Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Intel Corporation","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028443060","display_name":"Zahniser Tom","orcid":null},"institutions":[{"id":"https://openalex.org/I4210158342","display_name":"Intel (United Kingdom)","ror":"https://ror.org/058cxws58","country_code":"GB","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210158342"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tom Zahniser","raw_affiliation_strings":["Intel Corporation"],"affiliations":[{"raw_affiliation_string":"Intel Corporation","institution_ids":["https://openalex.org/I4210158342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011556020","display_name":"Omer Jon","orcid":null},"institutions":[{"id":"https://openalex.org/I1343180700","display_name":"Intel (United States)","ror":"https://ror.org/01ek73717","country_code":"US","type":"company","lineage":["https://openalex.org/I1343180700"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jon Omer","raw_affiliation_strings":["Intel Corp, Santa Clara, CA, US"],"affiliations":[{"raw_affiliation_string":"Intel Corp, Santa Clara, CA, US","institution_ids":["https://openalex.org/I1343180700"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108081911","display_name":"Judy Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I592451","display_name":"Indiana University","ror":"https://ror.org/01kg8sb98","country_code":"US","type":"education","lineage":["https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Judy Qiu","raw_affiliation_strings":["School of Informatics and Computing, Indiana University"],"affiliations":[{"raw_affiliation_string":"School of Informatics and Computing, Indiana University","institution_ids":["https://openalex.org/I592451"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5101652191"],"corresponding_institution_ids":["https://openalex.org/I592451"],"apc_list":null,"apc_paid":null,"fwci":0.5851,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.77038825,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"10","issue":null,"first_page":"243","last_page":"252"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.9025740623474121},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8741468191146851},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6469389200210571},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.49206870794296265},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.47967529296875},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.44876572489738464},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4457736313343048},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.4364207684993744},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42101335525512695},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.4147539436817169},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37540096044540405},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14442557096481323},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.0917859673500061}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.9025740623474121},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8741468191146851},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6469389200210571},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.49206870794296265},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.47967529296875},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.44876572489738464},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4457736313343048},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.4364207684993744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42101335525512695},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.4147539436817169},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37540096044540405},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14442557096481323},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0917859673500061},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2017.8257932","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2017.8257932","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1546359014","https://openalex.org/W1609010894","https://openalex.org/W1650067024","https://openalex.org/W1880262756","https://openalex.org/W2001082470","https://openalex.org/W2041517243","https://openalex.org/W2052261215","https://openalex.org/W2060393849","https://openalex.org/W2078670469","https://openalex.org/W2083842231","https://openalex.org/W2087937280","https://openalex.org/W2107469355","https://openalex.org/W2113547287","https://openalex.org/W2116137244","https://openalex.org/W2118034653","https://openalex.org/W2121966774","https://openalex.org/W2132737349","https://openalex.org/W2138996412","https://openalex.org/W2145702984","https://openalex.org/W2150731624","https://openalex.org/W2335835108","https://openalex.org/W2465468379","https://openalex.org/W2963491860","https://openalex.org/W2963558938","https://openalex.org/W4231510805","https://openalex.org/W6639619044","https://openalex.org/W6665801690","https://openalex.org/W6676133050","https://openalex.org/W6676756733","https://openalex.org/W6677121468","https://openalex.org/W6679393576","https://openalex.org/W6681349557","https://openalex.org/W6715131761"],"related_works":["https://openalex.org/W2888805565","https://openalex.org/W4312773271","https://openalex.org/W4315588616","https://openalex.org/W2769501189","https://openalex.org/W2962686197","https://openalex.org/W2207653751","https://openalex.org/W4293863151","https://openalex.org/W3159709618","https://openalex.org/W2611137333","https://openalex.org/W3005513013"],"abstract_inverted_index":{"Latent":[0],"Dirichlet":[1],"Allocation":[2],"(LDA)":[3],"is":[4,31,111],"a":[5,32,47,88],"widely":[6],"used":[7],"machine":[8],"learning":[9],"technique":[10],"in":[11,98,106,113],"topic":[12],"modeling":[13],"and":[14,26,30,39,61,80,101,116,119],"data":[15],"analysis.":[16],"Training":[17],"large":[18],"LDA":[19,54,124],"models":[20],"on":[21,59],"big":[22],"datasets":[23],"involves":[24],"dynamic":[25],"irregular":[27],"computation":[28],"patterns":[29],"major":[33],"challenge":[34],"to":[35,69,95],"both":[36,99],"algorithm":[37],"optimization":[38],"system":[40,56],"design.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45],"present":[46],"comprehensive":[48],"benchmarking":[49],"of":[50],"our":[51],"novel":[52],"synchronized":[53],"training":[55,125],"HarpLDA+":[57,82,110],"based":[58,73],"Hadoop":[60],"Java.":[62],"It":[63],"demonstrates":[64],"impressive":[65],"performance":[66],"when":[67],"compared":[68],"three":[70,123],"other":[71,122],"MPI/C++":[72],"state-of-the-art":[74],"systems,":[75],"which":[76],"are":[77],"LightLDA,":[78],"F+NomadLDA,":[79],"WarpLDA.":[81],"uses":[83],"optimized":[84],"collective":[85],"communication":[86,117],"with":[87],"timer":[89],"control":[90],"for":[91],"load":[92],"balance,":[93],"leading":[94],"stable":[96],"scalability":[97],"shared-memory":[100],"distributed":[102],"systems.":[103,126],"We":[104],"demonstrate":[105],"the":[107,121],"experiments":[108],"that":[109],"effective":[112],"reducing":[114],"synchronization":[115],"overhead":[118],"outperforms":[120]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
