{"id":"https://openalex.org/W7157960178","doi":"https://doi.org/10.3390/info17050413","title":"S-Gens: Structure-Aware Synthetic Data Generation for Enhancing Reasoning-Intensive Dense Retrieval","display_name":"S-Gens: Structure-Aware Synthetic Data Generation for Enhancing Reasoning-Intensive Dense Retrieval","publication_year":2026,"publication_date":"2026-04-26","ids":{"openalex":"https://openalex.org/W7157960178","doi":"https://doi.org/10.3390/info17050413"},"language":"en","primary_location":{"id":"doi:10.3390/info17050413","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info17050413","pdf_url":"https://www.mdpi.com/2078-2489/17/5/413/pdf?version=1777199844","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/17/5/413/pdf?version=1777199844","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103669156","display_name":"Zhou Lei","orcid":null},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Lei","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China"],"raw_orcid":"https://orcid.org/0009-0007-6824-991X","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015262782","display_name":"Yanqi Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanqi Xu","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China"],"raw_orcid":"https://orcid.org/0009-0003-8713-8853","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China","institution_ids":["https://openalex.org/I141962983"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5134857327","display_name":"Shengbo Chen","orcid":"https://orcid.org/0009-0001-5880-7259"},"institutions":[{"id":"https://openalex.org/I141962983","display_name":"Shanghai University of Engineering Science","ror":"https://ror.org/0557b9y08","country_code":"CN","type":"education","lineage":["https://openalex.org/I141962983"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shengbo Chen","raw_affiliation_strings":["School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China"],"raw_orcid":"https://orcid.org/0009-0001-5880-7259","affiliations":[{"raw_affiliation_string":"School of Computer Engineering and Science, Shanghai University, Shanghai 200444, China","institution_ids":["https://openalex.org/I141962983"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5134857327"],"corresponding_institution_ids":["https://openalex.org/I141962983"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.7681928,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":"5","first_page":"413","last_page":"413"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.3528999984264374,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.3528999984264374,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.3328999876976013,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14569999277591705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6603000164031982},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.620199978351593},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5974000096321106},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5027999877929688},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.46540001034736633},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4652000069618225},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4325999915599823},{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.42080000042915344}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7645000219345093},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6603000164031982},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.620199978351593},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5974000096321106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.553600013256073},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5027999877929688},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.46540001034736633},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4652000069618225},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43970000743865967},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4325999915599823},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.42080000042915344},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.40549999475479126},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.40049999952316284},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3310000002384186},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31619998812675476},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3124000132083893},{"id":"https://openalex.org/C93361087","wikidata":"https://www.wikidata.org/wiki/Q4426698","display_name":"Data consistency","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.26750001311302185},{"id":"https://openalex.org/C134752490","wikidata":"https://www.wikidata.org/wiki/Q374182","display_name":"Logical consequence","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/info17050413","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info17050413","pdf_url":"https://www.mdpi.com/2078-2489/17/5/413/pdf?version=1777199844","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:8c8f366f4cc4441e9124ae65b15b679b","is_oa":true,"landing_page_url":"https://doaj.org/article/8c8f366f4cc4441e9124ae65b15b679b","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 17, Iss 5, p 413 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/info17050413","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info17050413","pdf_url":"https://www.mdpi.com/2078-2489/17/5/413/pdf?version=1777199844","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.4696206748485565,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G4292543467","display_name":null,"funder_award_id":"22H00324","funder_id":"https://openalex.org/F4320325553","funder_display_name":"Shanghai University"}],"funders":[{"id":"https://openalex.org/F4320325553","display_name":"Shanghai University","ror":"https://ror.org/006teas31"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7157960178.pdf","grobid_xml":"https://content.openalex.org/works/W7157960178.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dense":[0],"retrievers":[1],"rely":[2],"heavily":[3],"on":[4,34,156,172],"high-quality":[5],"training":[6],"triplets,":[7],"yet":[8],"existing":[9],"data":[10,77,114],"construction":[11],"strategies":[12],"remain":[13],"inadequate":[14],"for":[15,62,80],"reasoning-intensive":[16,82,194],"retrieval":[17,147,192],"tasks":[18,175],"involving":[19],"multi-hop":[20,173],"reasoning,":[21],"entity":[22],"relation":[23,87],"tracing,":[24],"and":[25,38,57,97,102,142,179],"implicit":[26],"evidence":[27],"composition.":[28],"Positive":[29],"samples":[30,47],"are":[31,48],"often":[32],"based":[33],"shallow":[35],"semantic":[36],"relevance":[37],"fail":[39],"to":[40,94,145],"capture":[41],"explicit":[42],"reasoning":[43,174],"chains,":[44],"while":[45],"negative":[46],"typically":[49],"sampled":[50],"from":[51],"lexical":[52],"overlap":[53],"or":[54,148],"random":[55],"candidates":[56],"therefore":[58],"provide":[59],"limited":[60],"supervision":[61,132,187],"learning":[63],"clear":[64],"decision":[65],"boundaries.":[66],"To":[67,112],"address":[68],"these":[69],"issues,":[70],"we":[71,116],"propose":[72],"S-Gens,":[73],"a":[74,118],"structure-aware":[75,185],"synthetic":[76,186],"generation":[78],"framework":[79],"enhancing":[81],"dense":[83,191],"retrieval.":[84],"S-Gens":[85,127,162],"uses":[86],"paths":[88],"in":[89,193],"an":[90],"external":[91],"knowledge":[92],"graph":[93,120],"synthesize":[95],"queries":[96],"structurally":[98,108],"consistent":[99],"positive":[100],"samples,":[101],"further":[103],"constructs":[104],"semantically":[105],"similar":[106],"but":[107],"inconsistent":[109],"hard":[110],"negatives.":[111],"improve":[113,190],"reliability,":[115],"introduce":[117],"Siamese":[119],"neural":[121],"network-based":[122],"consistency":[123],"filtering":[124],"mechanism.":[125],"Because":[126],"operates":[128],"entirely":[129],"during":[130],"offline":[131],"construction,":[133],"it":[134],"remains":[135],"model-agnostic,":[136],"preserves":[137],"the":[138,169],"original":[139],"inference":[140],"architecture,":[141],"is":[143],"complementary":[144],"graph-guided":[146],"RAG":[149],"pipelines":[150],"that":[151,161,184],"inject":[152],"structure":[153],"online.":[154],"Experiments":[155],"five":[157],"benchmark":[158],"datasets":[159],"show":[160],"consistently":[163],"improves":[164],"multiple":[165],"trainable":[166],"retrievers,":[167],"with":[168],"largest":[170],"gains":[171],"such":[176],"as":[177],"WebQSP":[178],"HotpotQA.":[180],"These":[181],"results":[182],"indicate":[183],"can":[188],"effectively":[189],"settings.":[195]},"counts_by_year":[],"updated_date":"2026-05-29T09:21:14.243279","created_date":"2026-04-30T00:00:00"}
