{"id":"https://openalex.org/W4416401411","doi":"https://doi.org/10.1145/3774904.3792799","title":"TaoSR-AGRL: Adaptive Guided Reinforcement Learning Framework for E-commerce Search Relevance","display_name":"TaoSR-AGRL: Adaptive Guided Reinforcement Learning Framework for E-commerce Search Relevance","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W4416401411","doi":"https://doi.org/10.1145/3774904.3792799"},"language":"en","primary_location":{"id":"doi:10.1145/3774904.3792799","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3774904.3792799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.08048","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jianhui Yang","orcid":"https://orcid.org/0009-0004-3547-0472"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianhui Yang","raw_affiliation_strings":["Tsinghua University, Beijing, China and Taobao &amp;#38; Tmall Group of Alibaba, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-3547-0472","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China and Taobao &amp;#38; Tmall Group of Alibaba, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yiming Jin","orcid":"https://orcid.org/0009-0002-1786-0894"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Jin","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0002-1786-0894","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119918985","display_name":"Pengkun Jiao","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengkun Jiao","raw_affiliation_strings":["Fudan University, Shanghai, China and Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0007-0542-3482","affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China and Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Chenhe Dong","orcid":"https://orcid.org/0000-0002-2211-5138"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenhe Dong","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-2211-5138","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zerui Huang","orcid":"https://orcid.org/0009-0000-6354-9242"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zerui Huang","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0000-6354-9242","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027178632","display_name":"Shaowei Yao","orcid":"https://orcid.org/0009-0002-3216-7414"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaowei Yao","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0002-3216-7414","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103192305","display_name":"Xuejun Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojiang Zhou","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-1927-6167","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047537220","display_name":"Dan Ou","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Ou","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0009-9838-5343","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101669398","display_name":"Haihong Tang","orcid":"https://orcid.org/0000-0002-7103-975X"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haihong Tang","raw_affiliation_strings":["Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-7103-975X","affiliations":[{"raw_affiliation_string":"Taobao &amp;#38; Tmall Group of Alibaba, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I45928872"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00256231,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7955","last_page":"7966"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.5245000123977661,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.5245000123977661,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.16949999332427979,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.03240000084042549,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.8583999872207642},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7502999901771545},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.508899986743927},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.4796000123023987},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.3926999866962433},{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.34689998626708984}],"concepts":[{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.8583999872207642},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670999765396118},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7502999901771545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5958999991416931},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5523999929428101},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.508899986743927},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.4796000123023987},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3926999866962433},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.34689998626708984},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.3264999985694885},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2962999939918518},{"id":"https://openalex.org/C125014702","wikidata":"https://www.wikidata.org/wiki/Q4680749","display_name":"Adaptive learning","level":2,"score":0.2696000039577484}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3774904.3792799","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3774904.3792799","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2026","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2510.08048","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08048","pdf_url":"https://arxiv.org/pdf/2510.08048","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.08048","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.08048","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.08048","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08048","pdf_url":"https://arxiv.org/pdf/2510.08048","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416401411.pdf","grobid_xml":"https://content.openalex.org/works/W4416401411.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Query-product":[0],"relevance":[1,129,148,157,216],"prediction":[2,130],"is":[3],"fundamental":[4],"to":[5,73,86,175],"e-commerce":[6],"search":[7,235],"and":[8,23,31,64,82,111,159,170,195,208,220],"has":[9,228],"become":[10],"even":[11],"more":[12],"critical":[13],"in":[14,131,212,232],"the":[15,28,58,68,146,177,233],"era":[16],"of":[17,61,70,241,243],"AI-powered":[18],"shopping,":[19],"where":[20],"semantic":[21],"understanding":[22],"complex":[24],"reasoning":[25,78,110,183],"directly":[26],"shape":[27],"user":[29,65],"experience":[30],"business":[32,62],"conversion.":[33],"Large":[34],"Language":[35],"Models":[36],"(LLMs)":[37],"enable":[38],"generative,":[39],"reasoning-based":[40],"approaches,":[41],"typically":[42],"aligned":[43,154],"via":[44,89],"supervised":[45],"fine-tuning":[46],"(SFT)":[47],"or":[48],"preference":[49],"optimization":[50],"methods":[51,72],"like":[52,93],"Direct":[53],"Preference":[54],"Optimization":[55,97],"(DPO).":[56],"However,":[57],"increasing":[59],"complexity":[60],"rules":[63],"queries":[66],"exposes":[67],"inability":[69],"existing":[71],"endow":[74],"models":[75],"with":[76,155,226],"robust":[77],"capacity":[79],"for":[80,108,127],"long-tail":[81],"challenging":[83],"cases.":[84],"Efforts":[85],"address":[87,115],"this":[88],"reinforcement":[90],"learning":[91],"strategies":[92],"Group":[94],"Relative":[95],"Policy":[96],"(GRPO)":[98],"often":[99],"suffer":[100],"from":[101,180],"sparse":[102],"terminal":[103],"rewards,":[104],"offering":[105],"insufficient":[106],"guidance":[107,174],"multi-step":[109],"slowing":[112],"convergence.":[113],"To":[114],"these":[116],"challenges,":[117],"we":[118],"propose":[119],"TaoSR-AGRL,":[120],"an":[121],"Adaptive":[122,161],"Guided":[123,162],"Reinforcement":[124],"Learning":[125],"framework":[126],"LLM-based":[128],"Taobao":[132,202],"Search":[133],"Relevance.":[134],"TaoSR-AGRL":[135,188,227],"introduces":[136],"two":[137],"key":[138],"innovations:":[139],"(1)":[140],"Rule-aware":[141],"Reward":[142],"Shaping,":[143],"which":[144,164],"decomposes":[145],"final":[147],"judgment":[149],"into":[150],"dense,":[151],"structured":[152],"rewards":[153],"domain-specific":[156],"criteria;":[158],"(2)":[160],"Replay,":[163],"identifies":[165],"low-accuracy":[166],"rollouts":[167],"during":[168],"training":[169,221],"injects":[171],"targeted":[172],"ground-truth":[173],"steer":[176],"policy":[178],"away":[179],"stagnant,":[181],"rule-violating":[182],"patterns":[184],"toward":[185],"compliant":[186],"trajectories.":[187],"was":[189],"evaluated":[190],"on":[191,201,237],"large-scale":[192],"real-world":[193],"datasets":[194],"through":[196],"online":[197],"side-by-side":[198],"human":[199],"evaluations":[200],"Search.":[203],"It":[204],"consistently":[205],"outperforms":[206],"DPO":[207],"standard":[209],"GRPO":[210],"baselines":[211],"offline":[213],"experiments,":[214],"improving":[215],"accuracy,":[217],"rule":[218],"adherence,":[219],"stability.":[222],"The":[223],"model":[224],"trained":[225],"been":[229],"successfully":[230],"deployed":[231],"main":[234],"scenario":[236],"Taobao,":[238],"serving":[239],"hundreds":[240],"millions":[242],"users.":[244]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2025-10-11T00:00:00"}
