{"id":"https://openalex.org/W4412517279","doi":"https://doi.org/10.3390/make7030068","title":"Towards Robust Synthetic Data Generation for Simplification of Text in French","display_name":"Towards Robust Synthetic Data Generation for Simplification of Text in French","publication_year":2025,"publication_date":"2025-07-19","ids":{"openalex":"https://openalex.org/W4412517279","doi":"https://doi.org/10.3390/make7030068"},"language":"en","primary_location":{"id":"doi:10.3390/make7030068","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7030068","pdf_url":"https://www.mdpi.com/2504-4990/7/3/68/pdf?version=1753089803","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/7/3/68/pdf?version=1753089803","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036162843","display_name":"Nikos Tsourakis","orcid":"https://orcid.org/0000-0003-4195-599X"},"institutions":[{"id":"https://openalex.org/I114457229","display_name":"University of Geneva","ror":"https://ror.org/01swzsf04","country_code":"CH","type":"education","lineage":["https://openalex.org/I114457229"]},{"id":"https://openalex.org/I4210088449","display_name":"HES-SO Gen\u00e8ve","ror":"https://ror.org/007gfwn20","country_code":"CH","type":"education","lineage":["https://openalex.org/I173439891","https://openalex.org/I4210088449"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Nikos Tsourakis","raw_affiliation_strings":["Department of Translation Technology, TIM/FTI, University of Geneva, Bd du Pont-d\u2019Arve 40, 1205 Gen\u00e8ve, Switzerland"],"raw_orcid":"https://orcid.org/0000-0003-4195-599X","affiliations":[{"raw_affiliation_string":"Department of Translation Technology, TIM/FTI, University of Geneva, Bd du Pont-d\u2019Arve 40, 1205 Gen\u00e8ve, Switzerland","institution_ids":["https://openalex.org/I114457229","https://openalex.org/I4210088449"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5036162843"],"corresponding_institution_ids":["https://openalex.org/I114457229","https://openalex.org/I4210088449"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":1.7312,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.87387977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"7","issue":"3","first_page":"68","last_page":"68"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12151","display_name":"Interpreting and Communication in Healthcare","score":0.9218999743461609,"subfield":{"id":"https://openalex.org/subfields/3600","display_name":"General Health Professions"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5158950090408325},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.46522992849349976},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.367607444524765},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.353732705116272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31476593017578125}],"concepts":[{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5158950090408325},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46522992849349976},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.367607444524765},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.353732705116272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31476593017578125}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/make7030068","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7030068","pdf_url":"https://www.mdpi.com/2504-4990/7/3/68/pdf?version=1753089803","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:unige.ch:aou:unige:186715","is_oa":true,"landing_page_url":"https://archive-ouverte.unige.ch/unige:186715","pdf_url":null,"source":{"id":"https://openalex.org/S4306402259","display_name":"Archive ouverte UNIGE (University of Geneva)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114457229","host_organization_name":"University of Geneva","host_organization_lineage":["https://openalex.org/I114457229"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction, vol. 7, no. 3 (2025) p. 22","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:d61e6843440b441895fbc3298b454600","is_oa":true,"landing_page_url":"https://doaj.org/article/d61e6843440b441895fbc3298b454600","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 7, Iss 3, p 68 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make7030068","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7030068","pdf_url":"https://www.mdpi.com/2504-4990/7/3/68/pdf?version=1753089803","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G152417198","display_name":null,"funder_award_id":"197864","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412517279.pdf","grobid_xml":"https://content.openalex.org/works/W4412517279.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1746111881","https://openalex.org/W2045320784","https://openalex.org/W2063490579","https://openalex.org/W2101105183","https://openalex.org/W2109881807","https://openalex.org/W2534253848","https://openalex.org/W2561529111","https://openalex.org/W2564861257","https://openalex.org/W2605243085","https://openalex.org/W2963658612","https://openalex.org/W2970641574","https://openalex.org/W2997833324","https://openalex.org/W3003265726","https://openalex.org/W3011218596","https://openalex.org/W3092288641","https://openalex.org/W3160638507","https://openalex.org/W3170773997","https://openalex.org/W4226361124","https://openalex.org/W4362508231","https://openalex.org/W4379933476","https://openalex.org/W4384662964","https://openalex.org/W4385572634","https://openalex.org/W4389518954","https://openalex.org/W4390389361","https://openalex.org/W4390692489","https://openalex.org/W4396674388","https://openalex.org/W4399209607","https://openalex.org/W4401042929","https://openalex.org/W4402670893","https://openalex.org/W4402684073","https://openalex.org/W4405279206","https://openalex.org/W4411112932","https://openalex.org/W6600925709","https://openalex.org/W6678450939","https://openalex.org/W6682631176","https://openalex.org/W6742263385"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,48],"pipeline":[3],"for":[4],"synthetic":[5],"simplification":[6,108],"of":[7,96],"text":[8],"in":[9],"French":[10],"that":[11,52,101],"combines":[12],"large":[13],"language":[14],"models":[15],"with":[16],"structured":[17],"semantic":[18,68,105],"guidance.":[19],"Our":[20,83],"approach":[21],"enhances":[22],"data":[23],"generation":[24],"by":[25],"integrating":[26],"contextual":[27],"knowledge":[28,40],"from":[29],"Wikipedia":[30],"and":[31,34,57,65,72,93,104],"Vikidia":[32],"articles":[33],"injecting":[35],"symbolic":[36],"control":[37],"through":[38],"lightweight":[39],"graphs.":[41],"To":[42],"construct":[43],"document-level":[44],"representations,":[45],"we":[46],"implement":[47],"progressive":[49],"summarization":[50],"process":[51],"incrementally":[53],"builds":[54],"running":[55],"summaries":[56],"extracts":[58],"key":[59],"ideas.":[60],"Simplifications":[61],"are":[62],"generated":[63],"iteratively":[64],"assessed":[66],"using":[67,87],"comparisons":[69],"between":[70],"input":[71],"output":[73],"graphs,":[74],"enabling":[75],"targeted":[76],"regeneration":[77],"when":[78],"critical":[79],"information":[80],"is":[81,85],"lost.":[82],"system":[84],"implemented":[86],"LangChain\u2019s":[88],"orchestration":[89],"framework,":[90],"allowing":[91],"modular":[92],"extensible":[94],"coordination":[95],"LLM":[97],"components.":[98],"Evaluation":[99],"shows":[100],"context-aware":[102],"prompting":[103],"feedback":[106],"improve":[107],"quality":[109],"across":[110],"successive":[111],"iterations.":[112]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
