{"id":"https://openalex.org/W7127118820","doi":"https://doi.org/10.3390/data11020029","title":"Controlled Generation of Synthetic Spanish Texts: A Dataset Using LLMs with and Without Contextual Retrieval","display_name":"Controlled Generation of Synthetic Spanish Texts: A Dataset Using LLMs with and Without Contextual Retrieval","publication_year":2026,"publication_date":"2026-02-01","ids":{"openalex":"https://openalex.org/W7127118820","doi":"https://doi.org/10.3390/data11020029"},"language":"en","primary_location":{"id":"doi:10.3390/data11020029","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data11020029","pdf_url":"https://www.mdpi.com/2306-5729/11/2/29/pdf","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2306-5729/11/2/29/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124871399","display_name":"Jos\u00e9 M. Garc\u00eda-Campos","orcid":null},"institutions":[{"id":"https://openalex.org/I79238269","display_name":"Universidad de Sevilla","ror":"https://ror.org/03yxnpp24","country_code":"ES","type":"education","lineage":["https://openalex.org/I79238269"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Jos\u00e9 M. Garc\u00eda-Campos","raw_affiliation_strings":["Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain","institution_ids":["https://openalex.org/I79238269"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124867014","display_name":"Agust\u00edn W. Lara-Romero","orcid":null},"institutions":[{"id":"https://openalex.org/I79238269","display_name":"Universidad de Sevilla","ror":"https://ror.org/03yxnpp24","country_code":"ES","type":"education","lineage":["https://openalex.org/I79238269"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Agust\u00edn W. Lara-Romero","raw_affiliation_strings":["Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain","institution_ids":["https://openalex.org/I79238269"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124790921","display_name":"Vicente Mayor","orcid":null},"institutions":[{"id":"https://openalex.org/I79238269","display_name":"Universidad de Sevilla","ror":"https://ror.org/03yxnpp24","country_code":"ES","type":"education","lineage":["https://openalex.org/I79238269"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Vicente Mayor","raw_affiliation_strings":["Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain","institution_ids":["https://openalex.org/I79238269"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124798225","display_name":"Jorge Calvillo-Arbizu","orcid":null},"institutions":[{"id":"https://openalex.org/I79238269","display_name":"Universidad de Sevilla","ror":"https://ror.org/03yxnpp24","country_code":"ES","type":"education","lineage":["https://openalex.org/I79238269"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jorge Calvillo-Arbizu","raw_affiliation_strings":["Biomedical Engineering Group, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain","Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain"],"affiliations":[{"raw_affiliation_string":"Biomedical Engineering Group, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain","institution_ids":["https://openalex.org/I79238269"]},{"raw_affiliation_string":"Department of Telematics Engineering, University of Seville, Camino de los Descubrimientos s/n, 41092 Seville, Spain","institution_ids":["https://openalex.org/I79238269"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5124871399"],"corresponding_institution_ids":["https://openalex.org/I79238269"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3908371,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"2","first_page":"29","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.2540999948978424,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.2540999948978424,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12639999389648438,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.0989999994635582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7337999939918518},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4124999940395355},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.3792000114917755},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.3790000081062317},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.37540000677108765},{"id":"https://openalex.org/keywords/scarcity","display_name":"Scarcity","score":0.34279999136924744},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.3276999890804291}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7527999877929688},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7337999939918518},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5008000135421753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4609000086784363},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4487000107765198},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.3790000081062317},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3111000061035156},{"id":"https://openalex.org/C71611378","wikidata":"https://www.wikidata.org/wiki/Q5165191","display_name":"Contextual design","level":3,"score":0.2973000109195709},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2957000136375427},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C2983685735","wikidata":"https://www.wikidata.org/wiki/Q5227355","display_name":"Data source","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C52085439","wikidata":"https://www.wikidata.org/wiki/Q5165173","display_name":"Context analysis","level":3,"score":0.2702000141143799},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/data11020029","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data11020029","pdf_url":"https://www.mdpi.com/2306-5729/11/2/29/pdf","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:092d581fd9f44dcd8b5cf04cf1a879d2","is_oa":true,"landing_page_url":"https://doaj.org/article/092d581fd9f44dcd8b5cf04cf1a879d2","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 11, Iss 2, p 29 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/data11020029","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data11020029","pdf_url":"https://www.mdpi.com/2306-5729/11/2/29/pdf","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8519824147224426,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7127118820.pdf","grobid_xml":"https://content.openalex.org/works/W7127118820.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1967390364","https://openalex.org/W2087388117","https://openalex.org/W2156279557","https://openalex.org/W3033970320","https://openalex.org/W3155807546","https://openalex.org/W4292264221","https://openalex.org/W4309674289","https://openalex.org/W4385270229","https://openalex.org/W4391450222","https://openalex.org/W4401857375","https://openalex.org/W4403023435","https://openalex.org/W4403536522","https://openalex.org/W4404783082","https://openalex.org/W4405591004","https://openalex.org/W4406236959","https://openalex.org/W4413015513","https://openalex.org/W4414655343","https://openalex.org/W4417272388"],"related_works":[],"abstract_inverted_index":{"The":[0,63,116],"increasing":[1],"ability":[2],"of":[3,32,37,47,113,165],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"to":[8,20,66,109,125],"generate":[9],"fluent":[10],"and":[11,22,86,144,152,163],"coherent":[12],"text":[13,162],"has":[14],"heightened":[15],"the":[16,30,35,68,94,100,111],"need":[17],"for":[18],"resources":[19],"analyze":[21],"detect":[23],"synthetic":[24,49,123,161],"content,":[25],"particularly":[26],"in":[27,159],"Spanish,":[28],"where":[29],"scarcity":[31],"datasets":[33],"hinders":[34],"development":[36],"reliable":[38],"detection":[39],"systems.":[40],"This":[41],"work":[42],"presents":[43],"a":[44,58,72,87,150],"Spanish-language":[45],"dataset":[46,69,117,148],"18,236":[48],"news":[50,84],"descriptions":[51],"generated":[52,98],"from":[53,82],"real":[54],"journalistic":[55],"headlines":[56],"using":[57],"fully":[59],"reproducible,":[60],"open-source":[61],"pipeline.":[62],"methodology":[64],"used":[65],"produce":[67],"includes":[70,118],"both":[71],"Retrieval":[73],"Augmented":[74],"Generation":[75],"(RAG)":[76],"approach,":[77,89],"which":[78,90],"incorporates":[79],"contextual":[80,135,139],"information":[81],"recent":[83],"descriptions,":[85],"NO-RAG":[88],"relies":[91],"solely":[92],"on":[93],"headline.":[95],"Texts":[96],"were":[97],"with":[99],"instruction-tuned":[101],"Mistral":[102],"7B":[103],"Instruct":[104],"model,":[105],"systematically":[106],"varying":[107],"temperature":[108],"explore":[110],"effect":[112],"generation":[114,129],"parameters.":[115],"detailed":[119],"metadata":[120],"linking":[121],"each":[122],"description":[124],"its":[126],"source":[127],"headline,":[128],"settings,":[130],"and,":[131],"when":[132],"applicable,":[133],"retrieved":[134],"content.":[136],"By":[137],"combining":[138],"grounding,":[140],"controlled":[141],"parameter":[142],"variation,":[143],"source-level":[145],"traceability,":[146],"this":[147],"provides":[149],"reproducible":[151],"richly":[153],"annotated":[154],"resource":[155],"that":[156],"supports":[157],"research":[158],"Spanish":[160],"evaluation":[164],"LLM-based":[166],"generation.":[167]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2026-02-03T00:00:00"}
