{"id":"https://openalex.org/W7133190250","doi":"https://doi.org/10.48550/arxiv.2602.23620","title":"Synthetic Data Powers Product Retrieval for Long-tail Knowledge-Intensive Queries in E-commerce Search","display_name":"Synthetic Data Powers Product Retrieval for Long-tail Knowledge-Intensive Queries in E-commerce Search","publication_year":2026,"publication_date":"2026-02-27","ids":{"openalex":"https://openalex.org/W7133190250","doi":"https://doi.org/10.48550/arxiv.2602.23620"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.23620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.23620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.23620","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071742546","display_name":"Ling Gui","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ling, Gui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127870301","display_name":"Weiyuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Weiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127872646","display_name":"Yue Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Yue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101548852","display_name":"Wenjun Peng","orcid":"https://orcid.org/0000-0002-0578-0232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Wenjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078651910","display_name":"Xingxian Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xingxian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127863124","display_name":"Dongshuai Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dongshuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127794452","display_name":"Fuyu Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Fuyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047537220","display_name":"Dan Ou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ou, Dan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101669398","display_name":"Haihong Tang","orcid":"https://orcid.org/0000-0002-7103-975X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Haihong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5071742546"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.8212000131607056,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.8212000131607056,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.026799999177455902,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.02419999986886978,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rewriting","display_name":"Rewriting","score":0.7085000276565552},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.619700014591217},{"id":"https://openalex.org/keywords/data-retrieval","display_name":"Data retrieval","score":0.5378000140190125},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5175999999046326},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.49219998717308044},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.4390999972820282},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.4032999873161316},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.39559999108314514},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.39309999346733093}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8378999829292297},{"id":"https://openalex.org/C154690210","wikidata":"https://www.wikidata.org/wiki/Q1668499","display_name":"Rewriting","level":2,"score":0.7085000276565552},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6453999876976013},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.619700014591217},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.5378000140190125},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5175999999046326},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49219998717308044},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.4390999972820282},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.4032999873161316},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.39559999108314514},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.39309999346733093},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.3815000057220459},{"id":"https://openalex.org/C2780613888","wikidata":"https://www.wikidata.org/wiki/Q6423394","display_name":"Knowledge retrieval","level":3,"score":0.3785000145435333},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3573000133037567},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.35440000891685486},{"id":"https://openalex.org/C4969071","wikidata":"https://www.wikidata.org/wiki/Q7316353","display_name":"Result set","level":3,"score":0.3449999988079071},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.32659998536109924},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3050000071525574},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C194051981","wikidata":"https://www.wikidata.org/wiki/Q1337691","display_name":"Economic shortage","level":3,"score":0.26260000467300415}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.23620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.23620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.23620","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.23620","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Product":[0],"retrieval":[1,84,99,123,156,189],"is":[2,107],"the":[3,23,111,126],"backbone":[4],"of":[5,20,72,113,130],"e-commerce":[6],"search:":[7],"for":[8,25,34,63,83],"each":[9],"user":[10,29,207],"query,":[11],"it":[12],"identifies":[13],"a":[14,70,80,114,134,153,203],"high-recall":[15],"candidate":[16],"set":[17],"from":[18,69],"billions":[19],"items,":[21],"laying":[22],"foundation":[24],"high-quality":[26],"ranking":[27],"and":[28,58,143],"experience.":[30,209],"Despite":[31],"extensive":[32],"optimization":[33],"mainstream":[35],"queries,":[36,43,165],"existing":[37],"systems":[38],"still":[39],"struggle":[40],"with":[41,139],"long-tail":[42],"especially":[44],"knowledge-intensive":[45,102],"ones.":[46],"These":[47],"queries":[48,79],"exhibit":[49],"diverse":[50],"linguistic":[51],"patterns,":[52],"often":[53],"lack":[54],"explicit":[55],"purchase":[56],"intent,":[57],"require":[59],"domain-specific":[60],"knowledge":[61],"reasoning":[62],"accurate":[64],"interpretation.":[65],"They":[66],"also":[67],"suffer":[68],"shortage":[71],"reliable":[73],"behavioral":[74],"logs,":[75],"which":[76,166],"makes":[77],"such":[78],"persistent":[81],"challenge":[82],"optimization.":[85],"To":[86],"address":[87],"these":[88],"issues,":[89],"we":[90,132],"propose":[91],"an":[92,120],"efficient":[93,121],"data":[94,187],"synthesis":[95],"framework":[96],"tailored":[97],"to":[98,108,193],"involving":[100],"long-tail,":[101],"queries.":[103],"The":[104],"key":[105],"idea":[106],"implicitly":[109],"distill":[110],"capabilities":[112],"powerful":[115,154],"offline":[116,155],"query-rewriting":[117],"model":[118,138,190],"into":[119,188],"online":[122],"system.":[124],"Leveraging":[125],"strong":[127],"language":[128],"understanding":[129],"LLMs,":[131],"train":[133],"multi-candidate":[135],"query":[136],"rewriting":[137,146],"multiple":[140],"reward":[141],"signals":[142],"capture":[144],"its":[145],"capability":[147],"in":[148,163,206],"well-curated":[149],"query-product":[150],"pairs":[151],"through":[152],"pipeline.":[157],"This":[158],"design":[159],"mitigates":[160],"distributional":[161],"shift":[162],"rewritten":[164],"might":[167],"otherwise":[168],"limit":[169],"incremental":[170],"recall":[171],"or":[172],"introduce":[173],"irrelevant":[174],"products.":[175],"Experiments":[176],"demonstrate":[177],"that":[178],"without":[179],"any":[180],"additional":[181],"tricks,":[182],"simply":[183],"incorporating":[184],"this":[185],"synthetic":[186],"training":[191],"leads":[192],"significant":[194],"improvements.":[195],"Online":[196],"Side-By-Side":[197],"(SBS)":[198],"human":[199],"evaluation":[200],"results":[201],"indicate":[202],"notable":[204],"enhancement":[205],"search":[208]},"counts_by_year":[],"updated_date":"2026-03-03T06:18:10.843953","created_date":"2026-03-03T00:00:00"}
