{"id":"https://openalex.org/W4393827062","doi":"https://doi.org/10.5281/zenodo.5837149","title":"ALEXSIS: A Dataset for Benchmarking Lexical Simplification for Spanish","display_name":"ALEXSIS: A Dataset for Benchmarking Lexical Simplification for Spanish","publication_year":2022,"publication_date":"2022-10-27","ids":{"openalex":"https://openalex.org/W4393827062","doi":"https://doi.org/10.5281/zenodo.5837149"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:5837149","is_oa":true,"landing_page_url":"https://zenodo.org/record/5837149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/5837149","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078621336","display_name":"Daniel Ferr\u00e9s","orcid":"https://orcid.org/0000-0001-8861-3298"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Ferr\u00e9s, Daniel","raw_affiliation_strings":["LaSTuS-TALN (Universitat Pompeu Fabra)"],"affiliations":[{"raw_affiliation_string":"LaSTuS-TALN (Universitat Pompeu Fabra)","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046825099","display_name":"Horacio Saggion","orcid":"https://orcid.org/0000-0003-0016-7807"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Pompeu Fabra University","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Saggion, Horacio","raw_affiliation_strings":["LaSTuS-TALN (Universitat Pompeu Fabra)"],"affiliations":[{"raw_affiliation_string":"LaSTuS-TALN (Universitat Pompeu Fabra)","institution_ids":["https://openalex.org/I170486558"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5078621336"],"corresponding_institution_ids":["https://openalex.org/I170486558"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.9095189571380615},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5839702486991882},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5834854245185852},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4084721505641937},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3530140519142151},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.07915988564491272}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.9095189571380615},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5839702486991882},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5834854245185852},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4084721505641937},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3530140519142151},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.07915988564491272},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:5837149","is_oa":true,"landing_page_url":"https://zenodo.org/record/5837149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.5837149","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.5837149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:5837149","is_oa":true,"landing_page_url":"https://zenodo.org/record/5837149","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4238897586","https://openalex.org/W435179959","https://openalex.org/W2619091065","https://openalex.org/W2059640416","https://openalex.org/W1490753184","https://openalex.org/W2284465472","https://openalex.org/W2291782699","https://openalex.org/W1993948687","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0,26,91,126,131,148,217,375],"ALEXSIS":[1,41,218,237,337,407,479],"Spanish":[2,219,432,464],"Dataset":[3,220,253,281],"for":[4,145,221,239,254,282,411,427,442,463,484,526,544,574,587],"Lexical":[5,222,255,283,335,347,354,443,481,560,607],"Simplification":[6,223,256,284,348,355,444,482,608,651],"contains":[7],"381":[8,54,475],"instances":[9,51,149,360,368,397,476],".":[10],"Each":[11],"instance":[12,70,87,160,376],"is":[13,29,67,82,129],"composed":[14],"by":[15],"a":[16,18,178,350,559],"sentence,":[17],"target":[19],"complex":[20,61,76,127,136,382],"word,":[21],"and":[22,248,264,291,301,366,408,413,431,465,487,499,516,554,603,618],"25":[23],"candidate":[24],"substitutions.":[25],"dataset":[27,238,351],"format":[28,153,273],"similar":[30,409],"to":[31,270,352],"that":[32,434],"of":[33,49,52,59,74,85,134,161,235,260,297,380,398,421,477,532,566,571,661,668],"LexMturk":[34,590],"(Horn":[35],"et":[36,459,468],"al.,":[37,460,469],"2014)":[38],"but":[39],"in":[40,63,78,88,143,154,257,285,341,357,363,371],"the":[42,53,60,64,75,79,83,86,135,140,146,151,162,236,243,261,271,298,342,364,372,381,390,395,399,419,422,455,474,478,533,567,572],"sentences":[43],"are":[44],"not":[45,385],"tokenized.":[46],"A":[47,252,280,416],"total":[48],"380":[50],"have":[55,150],"only":[56,68],"1":[57],"appearance":[58,133],"word":[62,77,128,137,383],"sentence.":[65,80],"There":[66],"one":[69,141],"with":[71,377,394,406,438,473],"two":[72,378,439],"appearances":[73,379],"This":[81],"case":[84],"line":[89],"263.":[90],"special":[92],"sentence":[93],"is:":[94],"\"Limita":[95],"al":[96,103,110,119],"norte":[97],"con":[98,105,112,121,213],"el":[99,106,122],"paraje":[100,107],"\u00c1rbol":[101],"Solo,":[102],"sur":[104],"San":[108,116,124],"Vicente,":[109],"este":[111],"la":[113,182],"localidad":[114],"de":[115,181,187,655,665],"Andr\u00e9s":[117],"y":[118,658],"oeste":[120],"Canal":[123],"Mart\u00edn.\"":[125],"\"paraje\".":[130],"first":[132],"\"paraje\"":[138],"was":[139,384],"marked":[142],"bold":[144],"annotators.":[147],"following":[152,244],"UTF8:":[155],"&lt;SENTENCE&gt;&lt;TAB&gt;&lt;COMPLEX_WORD_IN_SENTENCE&gt;&lt;TAB&gt;&lt;SUBSTITUTION_1&gt;&lt;TAB&gt;...&lt;TAB&gt;&lt;SUBSTITUTION_25&gt;":[156],"See":[157],"below":[158],"an":[159],"dataset.":[163,374,402,480],"________________________________":[164],"SAMPLE":[165],"INSTANCE":[166],"__________________________":[167],"Sufri\u00f3":[168],"una":[169],"importante":[170],"reducci\u00f3n":[171],"en":[172],"su":[173],"capacidad":[174],"para":[175,184],"poder":[176],"acogerse":[177,189],"las":[179],"normas":[180],"FIFA":[183],"los":[185],"estadios":[186],"f\u00fatbol.":[188],"adaptarse":[190,195,196,199,203,214],"sumarse":[191],"incorporarse":[192,206],"obedecer":[193],"apegarse":[194,200],"ampararse":[197,198],"aceptar":[201,210],"asimilarse":[202],"aplicarse":[204],"aceptarse":[205],"refugiarse":[207,211],"amparar":[208],"recurrir":[209],"cumplir":[212],"admitirse":[215],"__________________________________________________________________________":[216],"can":[224],"also":[225],"be":[226],"found":[227],"at":[228,448],"github:":[229],"https://github.com/LaSTUS-TALN-UPF/ALEXSIS":[230],"If":[231],"you":[232],"make":[233],"use":[234],"Spanish,":[240],"please":[241],"cite":[242],"paper:":[245],"Daniel":[246,290,491,631],"Ferr\u00e9s":[247,632],"Horacio":[249,500,635],"Saggion.<br>":[250,501],"ALEXSIS:":[251],"Spanish.<br>":[258,488],"Proceedings":[259,531,565],"Language":[262,299,317,507,619],"Resources":[263,300,318],"Evaluation":[265,302],"Conference":[266],"(LREC)":[267],"2022.":[268],"link":[269],"bibtex":[272],"file":[274],"[.bib]":[275],"<pre><code>@inproceedings{ferres-saggion@LREC2022,":[276],"title":[277],"=":[278,288,295,305,308,311,315,321,324],"\"ALEXSIS:":[279],"Spanish.\",":[286],"author":[287],"\"Ferr\u00e9s,":[289],"Saggion,":[292,514],"Horacio\",":[293],"booktitle":[294],"{Proceedings":[296],"Conference},":[303],"month":[304],"{June},":[306],"year":[307],"{2022},":[309],"address":[310],"{Marseille,":[312],"France},":[313],"publisher":[314],"{European":[316],"Association},":[319],"pages":[320,581],"{3582--3594},":[322],"url":[323],"{https://aclanthology.org/2022.lrec-1.383}":[325],"}":[326],"</code></pre>":[327],"<strong>RELATED":[328],"WORK</strong>":[329],"<strong>1)":[330],"TSAR-2022":[331,343,423],"Shared":[332,344,424],"Task":[333,345,425],"on":[334,346,536,610,615],"Simplification</strong>":[336],"has":[338,445],"been":[339,446],"used":[340,362,370],"as":[349],"evaluate":[353],"systems":[356,391],"Spanish.":[358],"12":[359],"were":[361,369,392,471],"trial-dataset":[365],"368":[367,396],"test":[373,401],"used.":[386],"In":[387,452,530,564,612],"this":[388,449,453],"evaluation":[389],"evaluated":[393,472],"TSAR-ES":[400],"https://github.com/LaSTUS-TALN-UPF/TSAR-2022-Shared-Task":[403],"<strong>2)":[404],"Experiments":[405],"datasets":[410,426],"English":[412],"Portuguese":[414,429],"(ALEXSIS-PT)</strong>":[415],"paper":[417,454],"describing":[418],"compilation":[420],"English,":[428,485],"(ALEXSIS-PT)":[430],"(ALEXSIS)":[433],"includes":[435],"several":[436],"experiments":[437],"state-of-the-art":[440],"approaches":[441,456],"published":[447],"link:":[450],"https://www.frontiersin.org/articles/10.3389/frai.2022.991242":[451],"((LSBert":[457],"(Qiang":[458],"2021)":[461],"adapted":[462],"TUNER":[466],"(Ferr\u00e9s":[467],"2017))":[470],"Benchmarks":[483],"Portuguese,":[486],"Sanja":[489],"\u0160tajner,":[490],"Ferr\u00e9s,":[492,512],"Matthew":[493],"Shardlow,":[494],"Kai":[495],"North,":[496],"Marcos":[497],"Zampieri":[498],"Front.":[502],"Artif.":[503],"Intell.":[504],"Sec.":[505],"Natural":[506],"Processing.<br>":[508],"doi:":[509,548,626],"10.3389/frai.2022.991242":[510],"<strong>REFERENCES</strong>":[511],"D.,":[513],"H.,":[515],"G\u00f3mez":[517],"Guinovart,":[518],"X.":[519,604],"(2017b).<br>":[520],"An":[521],"adaptable":[522],"lexical":[523],"simplification":[524],"architecture":[525],"Major":[527],"Ibero-Romance":[528],"languages.<br>":[529],"First":[534],"Workshop":[535],"Building":[537],"Linguistically":[538],"Generalizable":[539],"NLP":[540],"Systems":[541],"(Copenhagen:":[542],"Association":[543,573,586],"Computational":[545,575,588],"Linguistics),":[546],"40\u201347.<br>":[547],"10.18653/v1/W17-5406":[549],"Horn,":[550],"C.,":[551,553],"Manduca,":[552],"Kauchak,":[555],"D.":[556],"(2014).<br>":[557],"Learning":[558],"Simplifier":[561],"Using":[562],"Wikipedia.<br>":[563],"52nd":[568],"Annual":[569],"Meeting":[570],"Linguistics":[576],"(Volume":[577],"2:":[578],"Short":[579],"Papers),":[580],"458\u2013463,":[582],"Baltimore,":[583],"Maryland,":[584],"June.":[585],"Linguistics.":[589],"dataset:":[591],"https://cs.pomona.edu/~dkauchak/simplification/lex.mturk.14/lex.mturk.14.tar.gz":[592],"J.":[593],"Qiang,":[594],"Y.":[595,597,599,601],"Li,":[596],"Zhu,":[598],"Yuan,":[600],"Shi":[602],"Wu.<br>":[605],"LSBert:":[606],"Based":[609],"BER.<br>":[611],"IEEE/ACM":[613],"Transactions":[614],"Audio,":[616],"Speech,":[617],"Processing,":[620],"vol.":[621],"29,":[622],"pp.":[623],"3064-3076,":[624],"2021<br>":[625],"10.1109/TASLP.2021.3111589.":[627],"<strong>CONTACT</strong>":[628],"LaSTUS":[629],"lab@TALN@UPF":[630],"-":[633,637],"daniel.ferres[at]upf.edu":[634],"Saggion":[636],"horacio.saggion[at]upf.edu":[638],"(corresponding":[639],"author)":[640],"ConMuTeS":[641,646],"project":[642],"Link:":[643],"https://www.upf.edu/web/conmutes":[644],"<strong>ACKNOWLEDGEMENTS</strong>":[645],"project:":[647],"Context-aware":[648],"Multilingual":[649],"Text":[650],"(ConMuTeS)":[652],"PID2019-109066GB-I00/AEI/10.13039/501100011033":[653],"Ministerio":[654],"Ciencia,":[656],"Innovaci\u00f3n":[657],"Universidades":[659],"(MCIU)":[660],"Spain":[662,669],"Agencia":[663],"Estatal":[664],"Investigaci\u00f3n":[666],"(AEI)":[667]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
