{"id":"https://openalex.org/W6893284812","doi":"https://doi.org/10.5281/zenodo.15187257","title":"Replication package for the paper: \"Datasets, Bias, Licenses, and Terms of Use: A Large and Longitudinal Study on the Documentation of Hugging Face Machine Learning Models\"","display_name":"Replication package for the paper: \"Datasets, Bias, Licenses, and Terms of Use: A Large and Longitudinal Study on the Documentation of Hugging Face Machine Learning Models\"","publication_year":2025,"publication_date":"2025-04-10","ids":{"openalex":"https://openalex.org/W6893284812","doi":"https://doi.org/10.5281/zenodo.15187257"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.15187257","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15187257","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.15187257","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Pepe, Federica","orcid":"https://orcid.org/0009-0008-3038-3977"},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Pepe, Federica","raw_affiliation_strings":["University of Sannio"],"raw_orcid":"https://orcid.org/0009-0008-3038-3977","affiliations":[{"raw_affiliation_string":"University of Sannio","institution_ids":["https://openalex.org/I16337185"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nardone, Vittoria","orcid":"https://orcid.org/0000-0001-7888-6620"},"institutions":[{"id":"https://openalex.org/I129627893","display_name":"University of Molise","ror":"https://ror.org/04z08z627","country_code":"IT","type":"education","lineage":["https://openalex.org/I129627893"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Nardone, Vittoria","raw_affiliation_strings":["University of Molise"],"raw_orcid":"https://orcid.org/0000-0001-7888-6620","affiliations":[{"raw_affiliation_string":"University of Molise","institution_ids":["https://openalex.org/I129627893"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mastropaolo, Antonio","orcid":"https://orcid.org/0000-0002-7965-7712"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mastropaolo, Antonio","raw_affiliation_strings":["William & Mary"],"raw_orcid":"https://orcid.org/0000-0002-7965-7712","affiliations":[{"raw_affiliation_string":"William & Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Canfora, Gerardo","orcid":"https://orcid.org/0000-0003-0049-1279"},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Canfora, Gerardo","raw_affiliation_strings":["University of Sannio"],"raw_orcid":"https://orcid.org/0000-0003-0049-1279","affiliations":[{"raw_affiliation_string":"University of Sannio","institution_ids":["https://openalex.org/I16337185"]}]},{"author_position":"middle","author":{"id":null,"display_name":"BAVOTA, Gabriele","orcid":"https://orcid.org/0000-0002-2216-3148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"BAVOTA, Gabriele","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0000-0002-2216-3148","affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Di Penta, Massimiliano","orcid":"https://orcid.org/0000-0002-0340-9747"},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Di Penta, Massimiliano","raw_affiliation_strings":["University of Sannio"],"raw_orcid":"https://orcid.org/0000-0002-0340-9747","affiliations":[{"raw_affiliation_string":"University of Sannio","institution_ids":["https://openalex.org/I16337185"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.6952000260353088},{"id":"https://openalex.org/keywords/directory","display_name":"Directory","score":0.6377000212669373},{"id":"https://openalex.org/keywords/snapshot","display_name":"Snapshot (computer storage)","score":0.599399983882904},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.5205000042915344},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.4180000126361847},{"id":"https://openalex.org/keywords/replication","display_name":"Replication (statistics)","score":0.35019999742507935},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.3192000091075897}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8187000155448914},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.6952000260353088},{"id":"https://openalex.org/C2777683733","wikidata":"https://www.wikidata.org/wiki/Q201456","display_name":"Directory","level":2,"score":0.6377000212669373},{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.599399983882904},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.5205000042915344},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.508899986743927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4521999955177307},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4180000126361847},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.37549999356269836},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3617999851703644},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.35019999742507935},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3463999927043915},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3102000057697296},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C88006597","wikidata":"https://www.wikidata.org/wiki/Q690117","display_name":"Disk formatting","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C2984074130","wikidata":"https://www.wikidata.org/wiki/Q73539779","display_name":"R package","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2703000009059906},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.2549999952316284},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.15187257","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15187257","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.15187257","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15187257","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,30],"replication":[1,31],"package":[2,32],"contains":[3,33,158,167,177,190,200,209],"datasets":[4,222,243,255],"related":[5,66,320,340,362,384,405,433,456,471],"to":[6,56,67,195,321,341,363,385,406,434,457,472],"the":[7,22,34,39,43,49,76,82,102,116,126,135,143,153,178,182,191,196,201,210,228,237,251,260,271,285,300,312,332,453,462,468,477],"paper:":[8],"\"Datasets,":[9],"Bias,":[10,93],"Licenses,":[11],"and":[12,18,60,73,94,184,318,323,338,343,358,365,380,387,401,408,429,436],"Terms":[13],"of":[14,24,42,69,111,122,131,139,149,206,212,220,225,234,247,254,259,274,288,303,308,328,370,391,413,441,452,459,467,474],"Use:":[15],"A":[16],"Large":[17],"Longitudinal":[19],"Study":[20],"on":[21],"Documentation":[23],"Hugging":[25,88],"Face":[26,89],"Machine":[27],"Learning":[28],"Models\"":[29],"new":[35],"data":[36,47,74],"used":[37,249],"for":[38,48,163,172,187,250,346,415,443,461,476],"journal":[40],"version":[41],"manuscript,":[44],"featuring:":[45],"All":[46],"second":[50,185,478],"snapshot":[51,78,186,464,479],"(downloaded":[52],"in":[53,368,389,411,439],"September":[54],"2024)":[55],"answer":[57],"RQ1,":[58],"RQ2,":[59],"RQ3":[61,263],"Data":[62],"from":[63,75,81],"both":[64],"snapshots":[65],"terms":[68,369,390,412,440,458,473],"use":[70,460,475],"(RQ4)":[71],"Scripts":[72],"first":[77,183,463],"(April":[79],"2023)":[80],"ICPC":[83],"2024":[84],"paper":[85],"\"How":[86],"do":[87],"Models":[90],"Document":[91],"Datasets,":[92],"Licenses?":[95],"An":[96],"Empirical":[97],"Study\"":[98],"are":[99],"available":[100],"at":[101],"following":[103,117],"link:":[104],"https://zenodo.org/records/10058142":[105],"Root":[106],"directory":[107],"Dataset":[108],"Dataset/Dataset_HF_model_list.csv:":[109],"list":[110,121,130,138,148,219,224,233,307,327],"HF":[112,221],"models":[113,179,226,235,248],"analyzed,":[114],"with":[115,203,215,240,270,284,299,314,334,417,445],"information:":[118],"id,downloads,likes,tags,pipeline_tag,pipeline_category,License,license_model_permissivity":[119],"Dataset/Dataset_GitHub_prj_list_Transformers.txt:":[120],"GitHub":[123,132,140,150],"projects":[124,133,141,151,202,214,309,329,416,444],"using":[125,134,142,152],"transformers":[127,145,164,277],"library":[128,146,156,165,174,345,367,410,438],"Dataset/Dataset_GitHub_prj_list_Diffusers.txt:":[129],"diffuserslibrary":[136],"Dataset/Dataset_GitHub_prj_frompretrained_Transformers.txt:":[137],"\"from_pretrained\"":[144,154],"Dataset/Dataset_GitHub_prj_frompretrained_Diffusers.txt:":[147],"diffusers":[155,173,292],"Dataset/Dataset_GitHub_prj_model_used_Transformers.csv:":[157],"usage":[159,168,351,373,394,422],"pairs:":[160,169],"project,":[161,170],"model":[162,171,192,265],"Dataset/Dataset_GitHub_prj_model_used_Diffusers.csv:":[166],"Dataset/Dataset_IntersectedModels.csv":[175],":":[176,232,291],"shared":[180],"between":[181,354,376,397,425],"category":[188],"Dataset/modelsReadme:":[189],"cards":[193],"belonging":[194],"sample":[197,245],"size":[198],"Dataset/projects_with_5_or_more_stars.csv:":[199],"numStars":[204,216],"major":[205],"5":[207,349,420,448],"Dataset/projects_stars_summary.csv:":[208],"number":[211,273,287,302],"total":[213,306,326],"RQ1":[217],"RQ1/RQ1_dataset_list_HF.txt:":[218],"RQ1/RQ1_datasetTags.txt:":[223],"declaring":[227,236],"dataset":[229,238],"tag":[230,239],"RQ1/RQ1_modelDataset.csv":[231],"their":[241,315,335],"respective":[242,272,286,301,316,336],"RQ1/RQ1_datasetSample.csv:":[244],"set":[246],"manual":[252,261,454,469],"analysis":[253],"RQ2":[256],"RQ2/RQ2_bias_classification_sheet.csv:":[257],"results":[258,451,466],"labeling":[262,455,470],"RQ3/RQ3_License_Models.csv:":[264],"license":[266,279,294],"list,categorized":[267],"by":[268,282,297],"permissiveness,":[269,283,298],"occurrences":[275,289,304],"RQ3/RQ3_License_prjTransformers.csv:":[276],"project":[278,293],"list,":[280,295],"categorized":[281,296],"RQ3/RQ3_License_prjDiffusers.csv":[290],"RQ3/RQ3_prj_model_license_permissivity_Transformers_Diffusers.csv:":[305],"that":[310,330],"reuse":[311,331],"models,":[313,333],"licenses":[317,337,356,360,378,382,392,399,403,427,431,442],"permissiveness":[319,339,371,414],"Transformers":[322,342,364,386,407,435],"Diffuserslibrary":[324,388],"RQ3/RQ3_prj_model_license_permissivity_Transformers_Diffusers_Starmajor5.csv:":[325],"Diffusers":[344,366,409,437],"numStar":[347,418,446],">":[348,419,447],"RQ3/RQ3_Contingency_Matrix_permissivity_Transformers_Diffusers.csv:":[350],"contingency":[352,374,395,423],"table":[353,375,396,424],"projects'":[355,377,398,426],"(columns)":[357,379,400,428],"models'":[359,381,402,430],"(rows)":[361,383,404,432],"RQ3/RQ3_Contingency_Matrix_licenses_Transformers_Diffusers.csv:":[372],"RQ3/RQ3_Contingency_Matrix_permissivity_Transformers_Diffusers_Starmajor5.csv:":[393],"RQ3/RQ3_Contingency_Matrix_licenses_Transformers_Diffusers_Starmajor5.csv:":[421],"RQ4":[449],"RQ4/RQ4_Terms_of_Use_Snapshot1.csv:":[450],"RQ4/RQ4_Terms_of_Use_Snapshot2.csv:":[465]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
