{"id":"https://openalex.org/W7133598895","doi":"https://doi.org/10.48550/arxiv.2603.03543","title":"Tucano 2 Cool: Better Open Source LLMs for Portuguese","display_name":"Tucano 2 Cool: Better Open Source LLMs for Portuguese","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133598895","doi":"https://doi.org/10.48550/arxiv.2603.03543"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.03543","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076814021","display_name":"Nicholas Kluge Corr\u00eaa","orcid":"https://orcid.org/0000-0002-5633-6094"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Corr\u00eaa, Nicholas Kluge","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102662770","display_name":"Aniket Sen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sen, Aniket","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108928027","display_name":"Shiza Fatimah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fatimah, Shiza","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034330265","display_name":"S. Falk","orcid":"https://orcid.org/0009-0004-1369-7705"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Falk, Sophia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128153700","display_name":"Lennard Landgraf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Landgraf, Lennard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128141547","display_name":"Julia Kastner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kastner, Julia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128135447","display_name":"Lucie Flek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Flek, Lucie","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5076814021"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3075999915599823,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3075999915599823,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2451000064611435,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10109999775886536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.8928999900817871},{"id":"https://openalex.org/keywords/portuguese","display_name":"Portuguese","score":0.7843000292778015},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.498199999332428},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.489300012588501},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.41530001163482666}],"concepts":[{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.8928999900817871},{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.7843000292778015},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5188999772071838},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.498199999332428},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.489300012588501},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.41530001163482666},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34869998693466187},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3463999927043915},{"id":"https://openalex.org/C2778880076","wikidata":"https://www.wikidata.org/wiki/Q750553","display_name":"Brazilian Portuguese","level":3,"score":0.29649999737739563},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2660999894142151},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.263700008392334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25949999690055847},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2556000053882599}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.03543","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.03543","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.03543","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.03543","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.47343456745147705,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,125],"present":[1],"Tucano":[2,109,159],"2,":[3],"a":[4,39,49,139],"fully":[5],"open":[6],"suite":[7,111,142],"of":[8,42,93],"large":[9],"language":[10],"models":[11],"(LLMs)":[12],"with":[13,158],"0.5-3.7":[14],"billion":[15],"parameters,":[16],"designed":[17],"to":[18,38,75],"address":[19],"certain":[20],"gaps":[21,59],"in":[22,60,78,134],"open-source":[23],"development":[24],"for":[25,107],"Portuguese":[26,73,183],"LLMs.":[27],"Following":[28],"our":[29,35,135,173],"previous":[30],"works,":[31],"we":[32,99],"now":[33],"extend":[34,127],"dataset,":[36,52],"GigaVerbo-v2,":[37,61],"new":[40,50],"degree":[41],"quality":[43],"and":[44,62,68,89,103,114,128,152,168,178],"scale,":[45],"while":[46],"also":[47,126],"introducing":[48],"synthetic":[51],"GigaVerbo-v2":[53,66,69],"Synth,":[54],"aimed":[55],"at":[56],"filling":[57],"missing":[58],"two":[63],"post-training":[64,153],"datasets,":[65],"SFT":[67],"Preferences,":[70],"that":[71,143,172],"allow":[72],"LLMs":[74],"be":[76],"trained":[77],"domains":[79,92],"like":[80],"retrieval":[81],"augmented":[82],"generation,":[83],"coding,":[84],"tool":[85],"use,":[86],"chain-of-thought":[87],"reasoning,":[88],"many":[90],"other":[91],"interest.":[94],"Through":[95],"extensive":[96],"ablation":[97],"studies,":[98],"design":[100],"both":[101],"pretraining":[102,105],"continual":[104,150],"recipes":[106],"the":[108,130,181],"2":[110,160],"(Base,":[112],"Instruct,":[113],"Think),":[115],"which":[116],"achieve":[117],"state-of-the-art":[118],"performance":[119],"on":[120],"several":[121],"Portuguese-language":[122],"modeling":[123],"benchmarks.":[124],"refine":[129],"evaluation":[131,141],"harness":[132],"introduced":[133],"earlier":[136],"work,":[137],"yielding":[138],"comprehensive":[140],"provides":[144],"strong":[145],"signals":[146],"across":[147],"different":[148],"pretraining,":[149,151],"regimes.":[154],"All":[155],"artifacts":[156],"associated":[157],"are":[161],"openly":[162],"released,":[163],"including":[164],"training":[165],"recipes,":[166],"logs,":[167],"source":[169],"code,":[170],"ensuring":[171],"work":[174],"is":[175],"reproducible,":[176],"accessible,":[177],"extendable":[179],"by":[180],"broader":[182],"NLP":[184],"community.":[185]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-06T00:00:00"}
