{"id":"https://openalex.org/W4292117441","doi":"https://doi.org/10.1007/s10579-022-09609-0","title":"RastrOS Project: Natural Language Processing contributions to the development of an eye-tracking corpus with predictability norms for Brazilian Portuguese","display_name":"RastrOS Project: Natural Language Processing contributions to the development of an eye-tracking corpus with predictability norms for Brazilian Portuguese","publication_year":2022,"publication_date":"2022-08-17","ids":{"openalex":"https://openalex.org/W4292117441","doi":"https://doi.org/10.1007/s10579-022-09609-0","pmid":"https://pubmed.ncbi.nlm.nih.gov/35990365"},"language":"en","primary_location":{"id":"doi:10.1007/s10579-022-09609-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-022-09609-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09609-0.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09609-0.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056679551","display_name":"Sidney Evaldo Leal","orcid":"https://orcid.org/0000-0002-8817-2063"},"institutions":[{"id":"https://openalex.org/I17974374","display_name":"Universidade de S\u00e3o Paulo","ror":"https://ror.org/036rp1748","country_code":"BR","type":"education","lineage":["https://openalex.org/I17974374"]},{"id":"https://openalex.org/I4210131883","display_name":"Brazilian Society of Computational and Applied Mathematics","ror":"https://ror.org/03kcw4w74","country_code":"BR","type":"other","lineage":["https://openalex.org/I4210131883"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Sidney Evaldo Leal","raw_affiliation_strings":["Instituto de Ci\u00eancias Matem\u00e1ticas e de Computa\u00e7\u00e3o - University of S\u00e3o Paulo, S\u00e3o Paulo, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-8817-2063","affiliations":[{"raw_affiliation_string":"Instituto de Ci\u00eancias Matem\u00e1ticas e de Computa\u00e7\u00e3o - University of S\u00e3o Paulo, S\u00e3o Paulo, Brazil","institution_ids":["https://openalex.org/I4210131883","https://openalex.org/I17974374"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064157902","display_name":"Kate\u0159ina Luk\u00e1\u0161ov\u00e1","orcid":"https://orcid.org/0000-0002-1137-7298"},"institutions":[{"id":"https://openalex.org/I71715416","display_name":"Universidade Federal do ABC","ror":"https://ror.org/028kg9j04","country_code":"BR","type":"education","lineage":["https://openalex.org/I71715416"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Katerina Lukasova","raw_affiliation_strings":["Center of Mathematics, Computing and Cognition, Federal University of ABC, S\u00e3o Paulo, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-1137-7298","affiliations":[{"raw_affiliation_string":"Center of Mathematics, Computing and Cognition, Federal University of ABC, S\u00e3o Paulo, Brazil","institution_ids":["https://openalex.org/I71715416"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038305638","display_name":"Maria Teresa Carthery\u2010Goulart","orcid":"https://orcid.org/0000-0002-2751-4541"},"institutions":[{"id":"https://openalex.org/I71715416","display_name":"Universidade Federal do ABC","ror":"https://ror.org/028kg9j04","country_code":"BR","type":"education","lineage":["https://openalex.org/I71715416"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Maria Teresa Carthery-Goulart","raw_affiliation_strings":["Center of Mathematics, Computing and Cognition, Federal University of ABC, S\u00e3o Paulo, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-2751-4541","affiliations":[{"raw_affiliation_string":"Center of Mathematics, Computing and Cognition, Federal University of ABC, S\u00e3o Paulo, Brazil","institution_ids":["https://openalex.org/I71715416"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059202822","display_name":"Sandra Maria Alu\u00edsio","orcid":"https://orcid.org/0000-0001-5108-2630"},"institutions":[{"id":"https://openalex.org/I17974374","display_name":"Universidade de S\u00e3o Paulo","ror":"https://ror.org/036rp1748","country_code":"BR","type":"education","lineage":["https://openalex.org/I17974374"]},{"id":"https://openalex.org/I4210131883","display_name":"Brazilian Society of Computational and Applied Mathematics","ror":"https://ror.org/03kcw4w74","country_code":"BR","type":"other","lineage":["https://openalex.org/I4210131883"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Sandra Maria Alu\u00edsio","raw_affiliation_strings":["Instituto de Ci\u00eancias Matem\u00e1ticas e de Computa\u00e7\u00e3o - University of S\u00e3o Paulo, S\u00e3o Paulo, Brazil"],"raw_orcid":"https://orcid.org/0000-0001-5108-2630","affiliations":[{"raw_affiliation_string":"Instituto de Ci\u00eancias Matem\u00e1ticas e de Computa\u00e7\u00e3o - University of S\u00e3o Paulo, S\u00e3o Paulo, Brazil","institution_ids":["https://openalex.org/I4210131883","https://openalex.org/I17974374"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056679551"],"corresponding_institution_ids":["https://openalex.org/I17974374","https://openalex.org/I4210131883"],"apc_list":null,"apc_paid":null,"fwci":1.1098,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.81501199,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"56","issue":"4","first_page":"1333","last_page":"1372"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/predictability","display_name":"Predictability","score":0.8942973613739014},{"id":"https://openalex.org/keywords/portuguese","display_name":"Portuguese","score":0.8902720212936401},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.5150918364524841},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.49141108989715576},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49072325229644775},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.488619863986969},{"id":"https://openalex.org/keywords/brazilian-portuguese","display_name":"Brazilian Portuguese","score":0.48454156517982483},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45659953355789185},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.4442400634288788},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.40237125754356384},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.24758660793304443},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.18913203477859497},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.10019075870513916},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0711519718170166},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.0625375509262085},{"id":"https://openalex.org/keywords/pedagogy","display_name":"Pedagogy","score":0.04636567831039429}],"concepts":[{"id":"https://openalex.org/C197640229","wikidata":"https://www.wikidata.org/wiki/Q2534066","display_name":"Predictability","level":2,"score":0.8942973613739014},{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.8902720212936401},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.5150918364524841},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.49141108989715576},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49072325229644775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.488619863986969},{"id":"https://openalex.org/C2778880076","wikidata":"https://www.wikidata.org/wiki/Q750553","display_name":"Brazilian Portuguese","level":3,"score":0.48454156517982483},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45659953355789185},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.4442400634288788},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.40237125754356384},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.24758660793304443},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.18913203477859497},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.10019075870513916},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0711519718170166},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0625375509262085},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.04636567831039429},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1007/s10579-022-09609-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-022-09609-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09609-0.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},{"id":"pmid:35990365","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35990365","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language resources and evaluation","raw_type":null},{"id":"pmh:oai:hub.hku.hk:10722/357104","is_oa":false,"landing_page_url":"https://hub.hku.hk/handle/10722/357104","pdf_url":null,"source":{"id":"https://openalex.org/S4377196271","display_name":"The HKU Scholars Hub (University of Hong Kong)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I889458895","host_organization_name":"University of Hong Kong","host_organization_lineage":["https://openalex.org/I889458895"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:pubmedcentral.nih.gov:9383681","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9383681","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Lang Resour Eval","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1007/s10579-022-09609-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10579-022-09609-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10579-022-09609-0.pdf","source":{"id":"https://openalex.org/S4306424877","display_name":"Language Resources and Evaluation","issn_l":"1574-020X","issn":["1574-020X","1574-0218"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Language Resources and Evaluation","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G295657101","display_name":null,"funder_award_id":"2019/","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"},{"id":"https://openalex.org/G4121375769","display_name":null,"funder_award_id":"Grant Number 2019/09807-0","funder_id":"https://openalex.org/F4320320997","funder_display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo"}],"funders":[{"id":"https://openalex.org/F4320320997","display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado de S\u00e3o Paulo","ror":"https://ror.org/02ddkpn78"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4292117441.pdf","grobid_xml":"https://content.openalex.org/works/W4292117441.grobid-xml"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W54787769","https://openalex.org/W1614298861","https://openalex.org/W1673310716","https://openalex.org/W1975311598","https://openalex.org/W2012575176","https://openalex.org/W2015742814","https://openalex.org/W2030330674","https://openalex.org/W2037073318","https://openalex.org/W2057458773","https://openalex.org/W2063202923","https://openalex.org/W2067359214","https://openalex.org/W2079597242","https://openalex.org/W2094139727","https://openalex.org/W2107484888","https://openalex.org/W2124059530","https://openalex.org/W2128388135","https://openalex.org/W2139450036","https://openalex.org/W2153804780","https://openalex.org/W2250613244","https://openalex.org/W2289364919","https://openalex.org/W2346139862","https://openalex.org/W2394756230","https://openalex.org/W2397016496","https://openalex.org/W2470606929","https://openalex.org/W2493916176","https://openalex.org/W2496780162","https://openalex.org/W2616278914","https://openalex.org/W2740924709","https://openalex.org/W2752448390","https://openalex.org/W2760692555","https://openalex.org/W2787873964","https://openalex.org/W2788816349","https://openalex.org/W2790159056","https://openalex.org/W2806758205","https://openalex.org/W2819344641","https://openalex.org/W2963341956","https://openalex.org/W2964194677","https://openalex.org/W3029439886","https://openalex.org/W3096266342","https://openalex.org/W3117229574","https://openalex.org/W3192744132","https://openalex.org/W3196896018","https://openalex.org/W4231741839","https://openalex.org/W4242390622","https://openalex.org/W6676550054"],"related_works":["https://openalex.org/W2899664355","https://openalex.org/W2306266016","https://openalex.org/W4254443907","https://openalex.org/W2886753397","https://openalex.org/W4377968335","https://openalex.org/W2544314260","https://openalex.org/W4387674232","https://openalex.org/W340238179","https://openalex.org/W174271798","https://openalex.org/W3118496204"],"abstract_inverted_index":{"This":[0],"article":[1,27],"presents":[2],"RastrOS,":[3],"a":[4],"new":[5],"eye-tracking":[6,151],"corpus":[7,33,77],"of":[8,18,20,31,57,147,153],"eye":[9],"movement":[10],"data":[11,152],"from":[12,79,131],"university":[13],"students":[14],"during":[15],"silent":[16],"reading":[17],"paragraphs":[19,78],"texts":[21],"in":[22,48,158],"Brazilian":[23],"Portuguese":[24],"(BP).":[25],"The":[26],"shows":[28],"the":[29,32,43,55,65,71,76,89,93,102,107,132,138,159],"potential":[30],"for":[34,91,100,162],"natural":[35],"language":[36],"processing":[37],"(NLP)":[38],"using":[39,82],"it":[40,51],"to":[41,63,74,122],"evaluate":[42],"sentence":[44],"complexity":[45],"prediction":[46],"task":[47],"BP":[49],"and":[50,60,85,105,117,150],"also":[52,98],"focuses":[53],"on":[54,113],"description":[56],"NLP":[58],"resources":[59],"methods":[61],"developed":[62],"create":[64],"corpus.":[66],"Specifically,":[67],"we":[68],"present:":[69],"(i)":[70],"method":[72],"used":[73,121],"select":[75],"large":[80],"corpora,":[81],"linguistic":[83],"metrics":[84],"clustering":[86],"algorithms;":[87],"(ii)":[88],"platform":[90],"collecting":[92],"Cloze":[94],"test,":[95],"which":[96],"is":[97],"responsible":[99],"creating":[101],"project":[103],"datasets,":[104],"(iii)":[106],"hybrid":[108],"semantic":[109,124],"similarity":[110],"method,":[111],"based":[112],"word":[114,119],"embedding":[115],"models":[116],"contextualised":[118],"representations,":[120],"generate":[123],"predictability":[125,145],"norms.":[126],"RastrOS":[127],"can":[128],"be":[129],"downloaded":[130],"open":[133],"science":[134],"framework":[135],"repository":[136,161],"with":[137,144],"computational":[139],"infrastructure":[140],"mentioned":[141],"above.":[142],"Datasets":[143],"norms":[146],"393":[148],"participants":[149,155],"37":[154],"are":[156],"available":[157],"OSF":[160],"this":[163],"work":[164],"(https://osf.io/9jxg3/).":[165]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
