{"id":"https://openalex.org/W7148478047","doi":"https://doi.org/10.48550/arxiv.2604.00019","title":"The Chronicles of RiDiC: Generating Datasets with Controlled Popularity Distribution for Long-form Factuality Evaluation","display_name":"The Chronicles of RiDiC: Generating Datasets with Controlled Popularity Distribution for Long-form Factuality Evaluation","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7148478047","doi":"https://doi.org/10.48550/arxiv.2604.00019"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00019","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00019","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00019","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061734661","display_name":"Pavel Braslavski","orcid":"https://orcid.org/0000-0002-6964-458X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Braslavski, Pavel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025368481","display_name":"Dmitrii Iarosh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iarosh, Dmitrii","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116225139","display_name":"Nikita Sushko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sushko, Nikita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018477452","display_name":"Andrey Sakhovskiy","orcid":"https://orcid.org/0000-0003-2762-2910"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sakhovskiy, Andrey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080930394","display_name":"Vasily Konovalov","orcid":"https://orcid.org/0000-0002-4745-4718"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Konovalov, Vasily","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132791965","display_name":"Elena Tutubalina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tutubalina, Elena","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132788772","display_name":"Alexander Panchenko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panchenko, Alexander","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5061734661"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4300999939441681,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4300999939441681,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2718999981880188,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.12060000002384186,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.8217999935150146},{"id":"https://openalex.org/keywords/popularity","display_name":"Popularity","score":0.8199999928474426},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6949999928474426},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.424699991941452},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.3944999873638153},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3441999852657318}],"concepts":[{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.8217999935150146},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.8199999928474426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7782999873161316},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6949999928474426},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6075999736785889},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5885999798774719},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48429998755455017},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.424699991941452},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3944999873638153},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3441999852657318},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C53605480","wikidata":"https://www.wikidata.org/wiki/Q852595","display_name":"Geotagging","level":2,"score":0.31200000643730164},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2863999903202057},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2628999948501587}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00019","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00019","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00019","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00019","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","score":0.6970012784004211,"display_name":"Climate action"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,47],"present":[1,48],"a":[2,123],"configurable":[3],"pipeline":[4],"for":[5,31],"generating":[6],"multilingual":[7],"sets":[8],"of":[9,35,55,144],"entities":[10,61,108,130],"with":[11],"specified":[12],"characteristics,":[13],"such":[14],"as":[15,52],"domain,":[16],"geographical":[17,83],"location":[18],"and":[19,25,69,86,91,94,116,154],"popularity,":[20],"using":[21,122],"data":[22],"from":[23,62,111,131],"Wikipedia":[24,96],"Wikidata.":[26],"These":[27,118],"datasets":[28],"are":[29],"intended":[30],"evaluating":[32],"the":[33,49,142,151],"factuality":[34,125,147],"LLMs'":[36,103,145],"long-form":[37,146],"generation,":[38],"thereby":[39],"complementing":[40],"evaluation":[41,143],"based":[42],"on":[43],"short-form":[44],"QA":[45],"datasets.":[46],"RiDiC":[50,58,107],"dataset":[51,133],"an":[53],"example":[54],"this":[56],"approach.":[57],"contains":[59],"3,000":[60],"three":[63,112],"domains":[64],"--":[65,72],"rivers,":[66],"natural":[67],"disasters,":[68],"car":[70],"models":[71,137],"spanning":[73],"different":[74],"popularity":[75],"tiers.":[76],"Each":[77],"entity":[78],"is":[79,99],"accompanied":[80],"by":[81],"its":[82],"location,":[84],"English":[85,93,115],"Chinese":[87,95],"names":[88],"(if":[89],"available)":[90],"relevant":[92],"content,":[97],"which":[98,127],"used":[100],"to":[101,138],"evaluate":[102],"responses.":[104],"Generations":[105],"about":[106],"were":[109,119],"obtained":[110],"LLMs":[113],"in":[114,148],"Chinese.":[117],"then":[120],"evaluated":[121],"third-party":[124],"checker,":[126],"showed":[128],"that":[129],"our":[132],"caused":[134],"even":[135],"frontier":[136],"hallucinate.":[139],"To":[140],"facilitate":[141],"multiple":[149],"languages,":[150],"code,":[152],"data,":[153],"generation/evaluation":[155],"scripts":[156],"have":[157],"been":[158],"released.":[159]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
