{"id":"https://openalex.org/W7082980007","doi":"https://doi.org/10.5281/zenodo.17185408","title":"PreprintToPaper dataset","display_name":"PreprintToPaper dataset","publication_year":2025,"publication_date":"2025-09-23","ids":{"openalex":"https://openalex.org/W7082980007","doi":"https://doi.org/10.5281/zenodo.17185408"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.17185408","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17185408","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.17185408","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Badalova, Fidan","orcid":"https://orcid.org/0009-0005-3525-0717"},"institutions":[{"id":"https://openalex.org/I4210101898","display_name":"GESIS - Leibniz Institute for the Social Sciences","ror":"https://ror.org/018afyw53","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210101898"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Badalova, Fidan","raw_affiliation_strings":["GESIS - Leibniz-Institute for the Social Sciences"],"raw_orcid":"https://orcid.org/0009-0005-3525-0717","affiliations":[{"raw_affiliation_string":"GESIS - Leibniz-Institute for the Social Sciences","institution_ids":["https://openalex.org/I4210101898"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mayr, Philipp","orcid":"https://orcid.org/0000-0002-6656-1658"},"institutions":[{"id":"https://openalex.org/I4210101898","display_name":"GESIS - Leibniz Institute for the Social Sciences","ror":"https://ror.org/018afyw53","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210101898"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Mayr, Philipp","raw_affiliation_strings":["GESIS Leibniz-Institut fur Sozialwissenschaften in Koln"],"raw_orcid":"https://orcid.org/0000-0002-6656-1658","affiliations":[{"raw_affiliation_string":"GESIS Leibniz-Institut fur Sozialwissenschaften in Koln","institution_ids":["https://openalex.org/I4210101898"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sienkiewicz, Julian","orcid":"https://orcid.org/0000-0003-2097-1499"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Sienkiewicz, Julian","raw_affiliation_strings":["Warsaw University of Technology"],"raw_orcid":"https://orcid.org/0000-0003-2097-1499","affiliations":[{"raw_affiliation_string":"Warsaw University of Technology","institution_ids":["https://openalex.org/I108403487"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11290","display_name":"Preterm Birth and Chorioamnionitis","score":0.1941000074148178,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11290","display_name":"Preterm Birth and Chorioamnionitis","score":0.1941000074148178,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10978","display_name":"Prenatal Screening and Diagnostics","score":0.14880000054836273,"subfield":{"id":"https://openalex.org/subfields/2735","display_name":"Pediatrics, Perinatology and Child Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11589","display_name":"Gynecological conditions and treatments","score":0.0820000022649765,"subfield":{"id":"https://openalex.org/subfields/2729","display_name":"Obstetrics and Gynecology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8568000197410583},{"id":"https://openalex.org/keywords/preprint","display_name":"Preprint","score":0.5062999725341797},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.4778999984264374},{"id":"https://openalex.org/keywords/coronavirus-disease-2019","display_name":"Coronavirus disease 2019 (COVID-19)","score":0.41519999504089355},{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.3797999918460846},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.2842999994754791}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8568000197410583},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6406000256538391},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5622000098228455},{"id":"https://openalex.org/C43169469","wikidata":"https://www.wikidata.org/wiki/Q580922","display_name":"Preprint","level":2,"score":0.5062999725341797},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.4778999984264374},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4677000045776367},{"id":"https://openalex.org/C3008058167","wikidata":"https://www.wikidata.org/wiki/Q84263196","display_name":"Coronavirus disease 2019 (COVID-19)","level":4,"score":0.41519999504089355},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C161191863","wikidata":"https://www.wikidata.org/wiki/Q199655","display_name":"Library science","level":1,"score":0.3052999973297119},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C3007834351","wikidata":"https://www.wikidata.org/wiki/Q82069695","display_name":"Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)","level":5,"score":0.2736000120639801},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C2781291010","wikidata":"https://www.wikidata.org/wiki/Q178580","display_name":"Period (music)","level":2,"score":0.2669999897480011}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.17185408","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17185408","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.17185408","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.17185408","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/3","display_name":"Good health and well-being","score":0.4417790174484253}],"awards":[{"id":"https://openalex.org/G6476165626","display_name":null,"funder_award_id":"101086321","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"PreprintToPaper":[0,25,142],"dataset:":[1],"connecting":[2],"bioRxiv":[3,42,64,217,308,393,534,615,629,689,794,822,1041],"preprints":[4,9,15,43,48,97,188,315,320,441,449,691,703,912,951],"with":[5,340,419,692,832,913,952,1080,1090,1112],"journal":[6,467],"publications":[7,197,306,468],"Some":[8],"get":[10],"published,":[11,377],"while":[12,1110],"others":[13],"remain":[14,471],"without":[16,429],"ever":[17],"being":[18],"formally":[19],"published":[20,100,173,232,396,407,674,695,739,798,827,849,867,972,1049,1061],"in":[21,51,57,72,150,307,414,438,704,717,748,787,870,957,977,990,995,1006,1075,1100],"a":[22,52,160,236,334,341,395,430,641,709,833,860,879,895,919,1059,1091,1113],"journal.":[23],"The":[24,54,75,91,115,141,314,370,456,651,678,925,940,986],"dataset":[26,55,76,92,143,184,203,371,457,1017],"is":[27,181,204,884,898,927],"the":[28,63,68,81,86,133,164,171,182,216,227,244,250,257,263,268,284,298,301,350,444,451,533,644,649,705,723,736,769,788,793,821,826,857,865,892,902,905,930,962,968,981,991,1020,1023,1048,1076,1101],"first":[29,134],"of":[30,138,159,262,461,574,591,648,659,673,676,722,790,859,864,904,923,942,970,979,1022,1047,1115],"its":[31,797],"kind,":[32],"as":[33,1014,1104,1119],"it":[34,282],"attempts":[35],"to":[36,119,147,170,364,394,466,643,783,853,891,900,938,1085,1097],"automatically":[37],"collect":[38],"publication":[39,109,432,710,762,775,784,882,955],"information":[40],"from":[41,62,163,189,681,741,755,773,780,818,936,1125],"and":[44,67,85,98,108,112,131,135,153,247,259,281,326,337,345,380,409,425,447,595,656,697,796,825,837,848,918,934,967,976],"track":[45],"whether":[46,768,878],"submitted":[47],"have":[49,404,608],"resulted":[50],"publication.":[53],"generated":[56],"this":[58,202,355,996],"study":[59,148],"was":[60,117,279,283,289,331,362,372,711,738,772,944,974,983,1063,1073],"retrieved":[61,214,225],"preprint":[65,239,264,378,535,600,724,737,781,795,823,847,861,965,1042],"server":[66],"Crossref":[69,228,756],"metadata":[70,95,116,212,223,655],"API":[71,218,229],"July":[73],"2024.":[74],"covers":[77],"two":[78,322,682,1001,1126],"time":[79,191],"periods:":[80],"pre-pandemic":[82],"period":[83,89,246,445,452],"(2016\u20132018)":[84],"COVID-19":[87],"pandemic":[88],"(2020\u20132022).":[90],"contains":[93,653],"detailed":[94],"about":[96],"their":[99,971],"versions,":[101],"including":[102],"titles,":[103,151],"authors,":[104,806],"abstracts,":[105,152],"institutions,":[106],"submission":[107,166,597,782,1038],"dates,":[110],"licenses,":[111],"subject":[113],"categories.":[114],"processed":[118,187],"facilitate":[120],"analysis,":[121],"for":[122,243,249,443,450,532,690,702,1018,1054],"example,":[123],"by":[124,311,318,628,688,1000,1107],"standardizing":[125],"date":[126,579,585,759,771],"formats,":[127],"normalizing":[128],"author":[129,154,327,359,426,562,568,576,753,850,1093],"names,":[130],"selecting":[132],"last":[136,260,546,558,570,587,596,669],"version":[137,278,288,408,541,547,553,559,565,582,588,601,665,670,1062],"each":[139],"preprint.":[140,650,660],"offers":[144],"diverse":[145],"opportunities":[146],"changes":[149],"composition":[155],"over":[156],"different":[157],"stages":[158,169],"preprint,":[161],"starting":[162],"initial":[165,258,285,540,552,564,581,594,664],"through":[167],"revision":[168],"final":[172,770],"version.":[174,571,799],"Main":[175,523],"file":[176,647,652,992],"provided:":[177],"PreprintToPaper.csv":[178],"\u2014":[179],"This":[180,639,874,909,1011,1071],"main":[183,893],"containing":[185],"all":[186],"both":[190,256,271,654,1108],"periods.":[192],"Keywords":[193],"BioRxiv,":[194],"Crossref,":[195],"preprints,":[196,460,1057],"Paper":[198],"A":[199],"paper":[200,222,745,752],"describing":[201],"currently":[205],"under":[206],"preparation.":[207],"Usage":[208],"Data":[209,253],"Collection":[210],"Preprint":[211,398,483,537,543,549,555,612,622,727,1037],"were":[213,224,241,265,273,295,303,309,316,347,389,464,839,998,1099,1117],"via":[215,226],"-":[219,539,569],"api.biorxiv.org/details/[server]/[interval]/[cursor]/[format]":[220],"Published":[221,485,744],"based":[230,422,928,946],"on":[231,392,423,614,929,947,950],"DOI.":[233],"api.crossref.org/works/[doi]":[234],"As":[235],"result,":[237],"48,300":[238],"records":[240,302],"obtained":[242],"2016\u20132018":[245],"152,869":[248],"2020\u20132022":[251],"period.":[252],"Processing":[254],"If":[255,275,291],"versions":[261,272,294,607,675,973],"available":[266,280,296],"during":[267,297],"relevant":[269,299],"period,":[270,300],"retained.":[274,290],"only":[276,292,338,379],"one":[277],"version,":[286],"that":[287,353,388,401],"non-initial":[293],"deleted.":[304],"Unlinked":[305],"identified":[310,317,701,1053],"checking":[312,319],"preprints.":[313,677],"against":[321],"criteria:":[323],"title":[324,424,538,544,746,824,920,1082],"similarity":[325,330,815,931,963],"similarity.":[328],"Title":[329],"measured":[332],"using":[333,1003],"matching":[335,907,1025],"score,":[336],"cases":[339,980,1111],"score":[342,361,816,834,845,875,922,926,982,1072,1084,1095,1114],"between":[343,593,792,820,835,846,932,964],"0.75":[344,836,937,943,1081],"1":[346],"considered.":[348],"For":[349],"19,090":[351,474,519],"rows":[352],"met":[354],"threshold,":[356],"an":[357,693,953,1015],"additional":[358,1016],"match":[360,862,921,1083,1094],"calculated":[363],"more":[365,805],"accurately":[366],"identify":[367],"unlinked":[368],"publications.":[369],"categorized":[373],"into":[374],"3":[375],"groups:":[376],"gray":[381,706,871,880,1055,1077],"zone.":[382],"See":[383],"explanations":[384],"below:":[385],"Published:":[386],"Preprints":[387,400,418,487],"already":[390],"linked":[391,413,465,716],"article.":[397,1050],"only:":[399],"did":[402],"not":[403,411,714],"any":[405],"corresponding":[406,575,866],"could":[410],"be":[412,1067],"bioRxiv.":[415,718,958],"Gray":[416,481,730],"zone:":[417],"potential":[420,841,1060,1087],"matches":[421,1106],"similarity,":[427],"but":[428,713,1065],"confirmed":[431,1103],"DOI":[433,531,1043,1046],"link.":[434],"After":[435],"data":[436],"processing,":[437],"total,":[439],"33,687":[440],"remained":[442],"2016\u20132018,":[446],"111,830":[448],"2020\u20132022.":[453],"Final":[454],"Dataset":[455,524],"includes":[458,911],"145,517":[459,522],"which:":[462],"90,614":[463,521],"\u201cPublished\u201d,":[469],"35,813":[470,520],"\"Preprint":[472],"Only\",":[473],"are":[475,988],"\"Gray":[476],"Zone\".":[477],"Summary":[478],"Statistics":[479],"Period":[480],"Zone":[482,731],"Only":[484,830],"Total":[486,518],"2016":[488],"359":[489],"1,019":[490],"3,343":[491],"4,721":[492],"2017":[493],"841":[494],"2,314":[495],"8,191":[496],"11,346":[497],"2018":[498],"1,121":[499],"4,115":[500],"12,943":[501],"18,179":[502],"2020":[503],"3,746":[504],"8,884":[505],"26,081":[506],"38,711":[507],"2021":[508],"5,292":[509],"8,987":[510],"22,539":[511],"36,818":[512],"2022":[513],"7,731":[514],"10,494":[515],"17,517":[516],"35,742":[517],"Column":[525,527,1031,1033],"Descriptions":[526,1032],"Name":[528,1034],"Description":[529,1035],"biorxiv_doi":[530,1040],"biorxiv_title_1st":[536],"biorxiv_title_last":[542],"\u2013":[545,551,557,563,580,586,663,668],"biorxiv_authors_1st":[548],"authors":[550,556,791,858],"biorxiv_authors_last":[554],"biorxiv_author_corresponding_1st":[560],"Corresponding":[561,567],"biorxiv_author_corresponding_last":[566],"biorxiv_author_corresponding_institution":[572],"Affiliation":[573],"biorxiv_submission_date_1st":[577],"Submission":[578,584],"biorxiv_submission_date_last":[583],"custom_biorxivVersion_dateDifference":[589],"Number":[590],"days":[592],"biorxiv_version_last":[598],"Last":[599],"number":[602,789],"(e.g.,":[603,616,630,817],"5":[604,606],"if":[605],"been":[609],"submitted)":[610],"biorxiv_type":[611],"type":[613],"confirmatory,":[617],"contradictory,":[618],"new":[619],"result)":[620],"biorxiv_license":[621],"license":[623],"biorxiv_category":[624],"Subject":[625],"category":[626],"assigned":[627],"bioinformatics,":[631],"genetics)":[632],"biorxiv_jatsxml":[633],"JATS":[634,645],"XML":[635,646],"structure":[636],"(if":[637],"available).":[638],"provides":[640],"link":[642,883,956],"full":[657,1009],"text":[658],"biorxiv_abstract_1st":[661],"Abstract":[662,667],"biorxiv_abstract_last":[666],"biorxiv_published_doi":[671],"DOIs":[672,685,699,1052],"values":[679,803,810],"come":[680],"sources:":[683],"(1)":[684],"directly":[686,715],"provided":[687,899],"official":[694,954],"link,":[696],"(2)":[698],"additionally":[700],"zone,":[707],"where":[708,735,1058],"found":[712,1064],"custom_status":[719,914],"Custom":[720],"classification":[721],"status:":[725],"Published,":[726],"Only,":[728],"or":[729,763,776],"crossref_journal_name":[732],"Journal":[733],"name":[734],"(retrieved":[740],"Crossref)":[742,749],"crossref_title":[743],"(listed":[747],"crossref_authors":[750],"Publish":[751],"list":[754],"crossref_publication_date":[757],"Publication":[758],"(either":[760],"online":[761,774],"issue":[764,777],"date)":[765],"crossref_publication_date_type":[766],"Indicates":[767],"custom_submission&publication_dateDiff":[778],"Days":[779],"author_count_diff":[785],"Difference":[786],"Positive":[800,802],"=":[801,808,915],"indicate":[804,811],"negative":[807,809],"fewer.":[812],"title_match_score":[813],"Text":[814],"SequenceMatcher)":[819],"article":[828],"title.":[829],"pairs":[831],"1.0":[838],"considered":[840],"matches.":[842,1088],"author_match_score":[843,1070],"Similarity":[844],"lists,":[851],"used":[852,1074],"assess":[854],"how":[855],"closely":[856],"those":[863],"paper,":[868],"particularly":[869],"zone":[872,881,1056,1078],"cases.":[873],"helps":[876],"validate":[877,1086],"correct.":[885],"Additional":[886],"Human-Annotated":[887],"Subset":[888],"In":[889,959],"addition":[890],"dataset,":[894],"human-annotated":[896],"subset":[897,910,997,1012,1079],"support":[901],"evaluation":[903],"automatic":[906,1024],"procedure.":[908,1026],"\"gray":[916],"zone\"":[917],"0.75.":[924,985],"titles":[933,966,969],"ranges":[935],"1.":[939],"threshold":[941],"chosen":[945],"checks":[948],"performed":[949],"these":[960],"checks,":[961],"calculated,":[975],"91%":[978],"\u2265":[984],"results":[987],"presented":[989],"PreprintToPaper_GrayZone.csv.":[993],"Matches":[994],"reviewed":[999],"annotators":[1002,1127],"abstracts":[1004],"and,":[1005],"some":[1007],"cases,":[1008],"texts.":[1010],"serves":[1013],"assessing":[1019],"accuracy":[1021],"\u00b7":[1027],"File":[1028],"name:":[1029],"PreprintToPaper_GrayZone.csv":[1030],"year":[1036,1039],"suspected_published_doi":[1044],"Suspected":[1045],"Contains":[1051],"cannot":[1066],"fully":[1068],"confirmed.":[1069],"Cases":[1089],"high":[1092],"(close":[1096],"1)":[1098],"majority":[1102],"true":[1105],"annotators,":[1109],"0":[1116],"evaluated":[1118],"non-matched.":[1120],"annotator1":[1121],"/":[1122],"annotator2":[1123],"Labels":[1124],"(True,":[1128],"False,":[1129],"NA)":[1130]},"counts_by_year":[],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
