{"id":"https://openalex.org/W7153347728","doi":"https://doi.org/10.48550/arxiv.2604.08156","title":"Training Data Size Sensitivity in Unsupervised Rhyme Recognition","display_name":"Training Data Size Sensitivity in Unsupervised Rhyme Recognition","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7153347728","doi":"https://doi.org/10.48550/arxiv.2604.08156"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08156","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08156","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08156","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081539497","display_name":"Petr Plech\u00e1\u010d","orcid":"https://orcid.org/0000-0002-1003-4541"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Plech\u00e1\u010d, Petr","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000260754","display_name":"Artjoms \u0160e\u013ca","orcid":"https://orcid.org/0000-0002-2272-2077"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"\u0160e\u013ca, Artjoms","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043569790","display_name":"Silvie Cinkov\u00e1","orcid":"https://orcid.org/0000-0003-4526-3915"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cinkov\u00e1, Silvie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089887007","display_name":"Mirella De Sisto","orcid":"https://orcid.org/0000-0002-0899-5976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"De Sisto, Mirella","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059003169","display_name":"Lara Nugues","orcid":"https://orcid.org/0000-0003-1381-8090"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nugues, Lara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071563350","display_name":"Ne\u017ea Ko\u010dnik","orcid":"https://orcid.org/0009-0003-8318-2179"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ko\u010dnik, Ne\u017ea","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027843774","display_name":"Antonina Martynenko","orcid":"https://orcid.org/0000-0003-2892-9076"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martynenko, Antonina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133327604","display_name":"Ben Nagy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nagy, Ben","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133368433","display_name":"Luca Giovannini","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giovannini, Luca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5010868327","display_name":"Robert Kol\u00e1\u0159","orcid":"https://orcid.org/0000-0001-8061-1917"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kol\u00e1r, Robert","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5081539497"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.15559999644756317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11574","display_name":"Artificial Intelligence in Games","score":0.15559999644756317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.13410000503063202,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.08969999849796295,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rhyme","display_name":"Rhyme","score":0.9563000202178955},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.574999988079071},{"id":"https://openalex.org/keywords/affect","display_name":"Affect (linguistics)","score":0.5595999956130981},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5073000192642212},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4749000072479248},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.45890000462532043},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3928000032901764}],"concepts":[{"id":"https://openalex.org/C2777231075","wikidata":"https://www.wikidata.org/wiki/Q178715","display_name":"Rhyme","level":3,"score":0.9563000202178955},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.574999988079071},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.5595999956130981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.550599992275238},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5461000204086304},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5414999723434448},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5073000192642212},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4749000072479248},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.45890000462532043},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4406000077724457},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3928000032901764},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.38850000500679016},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3806000053882599},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3483999967575073},{"id":"https://openalex.org/C74672266","wikidata":"https://www.wikidata.org/wiki/Q815859","display_name":"Language acquisition","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C137584468","wikidata":"https://www.wikidata.org/wiki/Q35395","display_name":"Phonetics","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08156","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08156","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08156","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08156","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7995907664299011,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Rhyme":[0],"is":[1,5,7,11,48],"deceptively":[2],"intuitive:":[3],"what":[4],"or":[6,28],"not":[8],"a":[9,57,96,105,132,144],"rhyme":[10,17,53],"constructed":[12],"historically,":[13],"scholars":[14],"struggle":[15,169],"with":[16,154,170],"classification,":[18],"and":[19,35,83,89,111,125],"people":[20],"disagree":[21],"on":[22,64,104],"whether":[23],"two":[24],"words":[25,124],"are":[26],"rhymed":[27,33],"not.":[29],"This":[30,41],"complicates":[31],"automated":[32],"recognition":[34,54],"evaluation,":[36],"especially":[37],"in":[38,67,117,131],"multilingual":[39],"context.":[40],"article":[42],"investigates":[43],"how":[44,86],"much":[45],"training":[46,87,156],"data":[47],"needed":[49],"for":[50],"reliable":[51],"unsupervised":[52],"using":[55,143],"RhymeTagger,":[56],"language-independent":[58],"tool":[59],"that":[60],"identifies":[61],"rhymes":[62],"based":[63],"repeating":[65],"patterns":[66],"poetry":[68],"corpora.":[69],"We":[70,134],"evaluate":[71],"its":[72],"performance":[73,98],"across":[74],"seven":[75],"languages":[76],"(Czech,":[77],"German,":[78],"English,":[79],"French,":[80],"Italian,":[81],"Russian,":[82],"Slovene),":[84],"examining":[85],"size":[88],"language":[90,141],"differences":[91],"affect":[92],"accuracy.":[93],"To":[94],"set":[95],"realistic":[97],"benchmark,":[99],"we":[100],"assess":[101],"inter-annotator":[102],"agreement":[103],"manually":[106],"annotated":[107],"subset":[108],"of":[109],"poems":[110],"analyze":[112],"factors":[113],"contributing":[114],"to":[115,138],"disagreement":[116],"expert":[118],"annotations:":[119],"phonetic":[120,166],"similarity":[121],"between":[122],"rhyming":[123],"their":[126],"distance":[127],"from":[128],"each":[129],"other":[130],"poem.":[133],"also":[135],"compare":[136],"RhymeTagger":[137,158],"three":[139],"large":[140],"models":[142],"one-shot":[145],"learning":[146],"strategy.":[147],"Our":[148],"findings":[149],"show":[150],"that,":[151],"once":[152],"provided":[153],"sufficient":[155],"data,":[157],"consistently":[159],"outperforms":[160],"human":[161],"agreement,":[162],"while":[163],"LLMs":[164],"lacking":[165],"representation":[167],"significantly":[168],"the":[171],"task.":[172]},"counts_by_year":[],"updated_date":"2026-05-04T08:30:34.212998","created_date":"2026-04-11T00:00:00"}
