{"id":"https://openalex.org/W4385567765","doi":"https://doi.org/10.1145/3580305.3599934","title":"Root Cause Analysis for Microservice Systems via Hierarchical Reinforcement Learning from Human Feedback","display_name":"Root Cause Analysis for Microservice Systems via Hierarchical Reinforcement Learning from Human Feedback","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385567765","doi":"https://doi.org/10.1145/3580305.3599934"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599934","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599934","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083964854","display_name":"Lu Wang","orcid":"https://orcid.org/0000-0002-7305-1496"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lu Wang","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030520880","display_name":"Chaoyun Zhang","orcid":"https://orcid.org/0000-0002-1304-6839"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaoyun Zhang","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076137287","display_name":"Ruomeng Ding","orcid":"https://orcid.org/0000-0002-7800-8227"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruomeng Ding","raw_affiliation_strings":["Georgia Tech, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Tech, Atlanta, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100667410","display_name":"Yong Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Xu","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101472342","display_name":"Qihang Chen","orcid":"https://orcid.org/0009-0006-6955-5196"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qihang Chen","raw_affiliation_strings":["Microsoft, Suzhou, China","Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Suzhou, China","institution_ids":[]},{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101671137","display_name":"Wentao Zou","orcid":"https://orcid.org/0009-0003-0901-7466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wentao Zou","raw_affiliation_strings":["Microsoft, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Suzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101548992","display_name":"Qingjun Chen","orcid":"https://orcid.org/0009-0001-0548-2614"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingjun Chen","raw_affiliation_strings":["Microsoft, Suzhou, China","Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Suzhou, China","institution_ids":[]},{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101558063","display_name":"Meng Zhang","orcid":"https://orcid.org/0009-0004-4714-9053"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng Zhang","raw_affiliation_strings":["Microsoft, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Suzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090914230","display_name":"X. Y. Gao","orcid":"https://orcid.org/0009-0009-5460-9122"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuedong Gao","raw_affiliation_strings":["Microsoft, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Suzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101427717","display_name":"Hao Fan","orcid":"https://orcid.org/0009-0008-7845-1991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Fan","raw_affiliation_strings":["Microsoft, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Suzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070722259","display_name":"Saravan Rajmohan","orcid":"https://orcid.org/0000-0002-2019-213X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Saravan Rajmohan","raw_affiliation_strings":["Microsoft 365, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft 365, Seattle, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088646345","display_name":"Qingwei Lin","orcid":"https://orcid.org/0000-0003-2559-2383"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingwei Lin","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100331488","display_name":"Dongmei Zhang","orcid":"https://orcid.org/0000-0002-9230-2799"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongmei Zhang","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5083964854"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":4.593,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.95202386,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5116","last_page":"5125"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9858999848365784,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/root-cause-analysis","display_name":"Root cause analysis","score":0.795120120048523},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7482709288597107},{"id":"https://openalex.org/keywords/root-cause","display_name":"Root cause","score":0.623082160949707},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4907456636428833},{"id":"https://openalex.org/keywords/performance-indicator","display_name":"Performance indicator","score":0.4905223250389099},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4797409772872925},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4619008004665375},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.46072959899902344},{"id":"https://openalex.org/keywords/dependency-graph","display_name":"Dependency graph","score":0.45784491300582886},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.44595175981521606},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.44358959794044495},{"id":"https://openalex.org/keywords/business-process","display_name":"Business process","score":0.44315558671951294},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.28417253494262695},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28398826718330383},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.24395468831062317},{"id":"https://openalex.org/keywords/reliability-engineering","display_name":"Reliability engineering","score":0.24247968196868896},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16639041900634766},{"id":"https://openalex.org/keywords/work-in-process","display_name":"Work in process","score":0.159593403339386}],"concepts":[{"id":"https://openalex.org/C130963320","wikidata":"https://www.wikidata.org/wiki/Q1401207","display_name":"Root cause analysis","level":2,"score":0.795120120048523},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7482709288597107},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.623082160949707},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4907456636428833},{"id":"https://openalex.org/C135510737","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance indicator","level":2,"score":0.4905223250389099},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4797409772872925},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4619008004665375},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.46072959899902344},{"id":"https://openalex.org/C16311509","wikidata":"https://www.wikidata.org/wiki/Q4148050","display_name":"Dependency graph","level":3,"score":0.45784491300582886},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.44595175981521606},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.44358959794044495},{"id":"https://openalex.org/C85345410","wikidata":"https://www.wikidata.org/wiki/Q851587","display_name":"Business process","level":3,"score":0.44315558671951294},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.28417253494262695},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28398826718330383},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.24395468831062317},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.24247968196868896},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16639041900634766},{"id":"https://openalex.org/C174998907","wikidata":"https://www.wikidata.org/wiki/Q357662","display_name":"Work in process","level":2,"score":0.159593403339386},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3580305.3599934","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3580305.3599934","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2028604378","https://openalex.org/W2123993001","https://openalex.org/W2131099349","https://openalex.org/W2767289262","https://openalex.org/W2797620412","https://openalex.org/W2821372324","https://openalex.org/W2889900103","https://openalex.org/W2900100055","https://openalex.org/W2903799441","https://openalex.org/W2911366381","https://openalex.org/W2949523744","https://openalex.org/W2962883549","https://openalex.org/W2965960151","https://openalex.org/W2971032700","https://openalex.org/W2990563145","https://openalex.org/W3006026125","https://openalex.org/W3036859210","https://openalex.org/W3080253043","https://openalex.org/W3092126302","https://openalex.org/W3105252146","https://openalex.org/W3161254931","https://openalex.org/W3163635305","https://openalex.org/W3165186175","https://openalex.org/W3167780359","https://openalex.org/W3170937175","https://openalex.org/W3179172661","https://openalex.org/W3180222915","https://openalex.org/W4247080677","https://openalex.org/W4283324222","https://openalex.org/W4290944311","https://openalex.org/W4383898437","https://openalex.org/W4393367792","https://openalex.org/W6605475740","https://openalex.org/W6631500773","https://openalex.org/W6644114143","https://openalex.org/W6763561447","https://openalex.org/W6767750290","https://openalex.org/W6851092083","https://openalex.org/W6863951927"],"related_works":["https://openalex.org/W2030594396","https://openalex.org/W2754538212","https://openalex.org/W4200610016","https://openalex.org/W3045668461","https://openalex.org/W2490884653","https://openalex.org/W4255366506","https://openalex.org/W2183996497","https://openalex.org/W2056250485","https://openalex.org/W129587375","https://openalex.org/W3161254931"],"abstract_inverted_index":{"In":[0],"microservice":[1],"systems,":[2],"the":[3,33,46,51,71,78],"identification":[4],"of":[5,8,37,77],"root":[6,47],"causes":[7],"anomalies":[9],"is":[10,20],"imperative":[11],"for":[12],"service":[13,28],"reliability":[14],"and":[15,35,43,55,74],"business":[16],"impact.":[17],"This":[18],"process":[19],"typically":[21],"divided":[22],"into":[23],"two":[24],"phases:":[25],"(i)constructing":[26],"a":[27],"dependency":[29],"graph":[30],"that":[31,40],"outlines":[32],"sequence":[34],"structure":[36],"system":[38],"components":[39,49],"are":[41,66],"invoked,":[42],"(ii)":[44],"localizing":[45],"cause":[48],"using":[50],"graph,":[52],"traces,":[53],"logs,":[54],"Key":[56],"Performance":[57],"Indicators":[58],"(KPIs)":[59],"such":[60],"as":[61],"latency.":[62],"However,":[63],"both":[64],"phases":[65],"not":[67],"straightforward":[68],"due":[69],"to":[70],"highly":[72],"dynamic":[73],"complex":[75],"nature":[76],"system,":[79],"particularly":[80],"in":[81],"large-scale":[82],"commercial":[83],"architectures":[84],"like":[85],"Microsoft":[86],"Exchange.":[87]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
