{"id":"https://openalex.org/W4412877046","doi":"https://doi.org/10.1145/3711896.3737109","title":"Rewarding Graph Reasoning Process makes LLMs more Generalized Reasoners","display_name":"Rewarding Graph Reasoning Process makes LLMs more Generalized Reasoners","publication_year":2025,"publication_date":"2025-08-03","ids":{"openalex":"https://openalex.org/W4412877046","doi":"https://doi.org/10.1145/3711896.3737109"},"language":"en","primary_location":{"id":"doi:10.1145/3711896.3737109","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737109","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737109","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737109","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101655315","display_name":"Miao Peng","orcid":"https://orcid.org/0009-0002-7063-2014"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Miao Peng","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078624625","display_name":"Nuo Chen","orcid":"https://orcid.org/0009-0000-5880-4712"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nuo Chen","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113321958","display_name":"Zongrui Suo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zongrui Suo","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100405697","display_name":"Jia Li","orcid":"https://orcid.org/0000-0002-6362-4385"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia Li","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101655315"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.3142,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.92954057,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2257","last_page":"2268"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.8865000009536743,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.8865000009536743,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.8677999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.8222000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6520886421203613},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5773733854293823},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.510560154914856},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35298746824264526},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20915400981903076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6520886421203613},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5773733854293823},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.510560154914856},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35298746824264526},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20915400981903076}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3711896.3737109","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737109","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737109","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-165785","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-165785","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":{"id":"doi:10.1145/3711896.3737109","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711896.3737109","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3711896.3737109","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412877046.pdf","grobid_xml":"https://content.openalex.org/works/W4412877046.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W2626804490","https://openalex.org/W4321392130","https://openalex.org/W4400376199","https://openalex.org/W4401306886","https://openalex.org/W4401857377","https://openalex.org/W4404781253","https://openalex.org/W4404792930","https://openalex.org/W4409671924","https://openalex.org/W6600103761","https://openalex.org/W6600137863","https://openalex.org/W6600212061","https://openalex.org/W6602368977","https://openalex.org/W6624719054","https://openalex.org/W6717243457","https://openalex.org/W6834284007","https://openalex.org/W6890025973"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Despite":[0],"significant":[1],"advancements":[2],"in":[3,12,26,34,70,146,219],"Large":[4],"Language":[5],"Models":[6,20],"(LLMs),":[7],"developing":[8],"advanced":[9],"reasoning":[10,28,45,72,81,102,122,140,172,186,190,210,221],"capabilities":[11],"LLMs":[13],"remains":[14,47],"a":[15,75,175],"key":[16,148],"challenge.":[17],"Process":[18],"Reward":[19],"(PRMs)":[21],"have":[22],"demonstrated":[23],"exceptional":[24],"promise":[25],"enhancing":[27],"by":[29],"providing":[30],"step-wise":[31,106,125],"feedback,":[32],"particularly":[33],"the":[35,52,66,97,134,205,215,226],"context":[36],"of":[37,68,208,217],"mathematical":[38,193],"reasoning.":[39],"However,":[40],"their":[41],"application":[42],"to":[43,51,119,183],"broader":[44],"domains":[46,191],"understudied,":[48],"largely":[49],"due":[50],"high":[53],"costs":[54],"associated":[55],"with":[56,104,124],"manually":[57],"creating":[58],"step-level":[59,87],"supervision.":[60],"In":[61],"this":[62,129],"work,":[63],"we":[64,131],"explore":[65],"potential":[67,216],"PRMs":[69,218],"graph":[71,92,101,139,171,185],"problems":[73,103],"-":[74],"domain":[76],"that":[77,163],"demands":[78],"sophisticated":[79],"multi-step":[80],"and":[82,113,142,152,180,188,202,231],"offers":[83],"opportunities":[84],"for":[85,100,138,178,228],"automated":[86,110],"data":[88],"generation":[89],"using":[90,109],"established":[91],"algorithms.":[93],"We":[94],"introduce":[95],"GraphSilo,":[96],"largest":[98],"dataset":[99],"fine-grained":[105],"label,":[107],"built":[108],"Task-oriented":[111],"Trajectories":[112],"Monte":[114],"Carlo":[115],"Tree":[116],"Search":[117],"(MCTS)":[118],"generate":[120],"detailed":[121],"steps":[123],"labels.":[126],"Building":[127],"upon":[128],"dataset,":[130],"train":[132],"GraphPRM,":[133],"first":[135],"PRM":[136],"designed":[137],"problems,":[141],"evaluate":[143],"its":[144],"effectiveness":[145],"two":[147],"settings:":[149],"inference-time":[150],"scaling":[151],"reinforcement":[153],"learning":[154],"via":[155],"Direct":[156],"Preference":[157],"Optimization":[158],"(DPO).":[159],"Experimental":[160],"results":[161],"show":[162],"GraphPRM":[164,196],"significantly":[165],"improves":[166],"LLM":[167,198],"performance":[168,199],"across":[169,222],"13":[170],"tasks,":[173],"delivering":[174],"9%":[176],"gain":[177],"Qwen2.5-7B":[179],"demonstrating":[181],"transferability":[182],"new":[184,189],"datasets":[187],"like":[192],"problem-solving.":[194],"Notably,":[195],"enhances":[197],"on":[200],"GSM8K":[201],"MATH500,":[203],"underscoring":[204],"cross-domain":[206],"applicability":[207],"graph-based":[209],"rewards.":[211],"Our":[212],"findings":[213],"highlight":[214],"advancing":[220],"diverse":[223],"domains,":[224],"paving":[225],"way":[227],"more":[229],"versatile":[230],"effective":[232],"LLMs.":[233]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-26T15:22:09.906841","created_date":"2025-10-10T00:00:00"}
