{"id":"https://openalex.org/W4393610098","doi":"https://doi.org/10.5281/zenodo.10058142","title":"Dataset of the paper: \"How do Hugging Face Models Document Datasets, Bias, and Licenses? An Empirical Study\"","display_name":"Dataset of the paper: \"How do Hugging Face Models Document Datasets, Bias, and Licenses? An Empirical Study\"","publication_year":2023,"publication_date":"2023-10-31","ids":{"openalex":"https://openalex.org/W4393610098","doi":"https://doi.org/10.5281/zenodo.10058142"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.10058142","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10058142","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.10058142","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A9999999999","display_name":"Pepe, Federica","orcid":null},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Pepe, Federica","raw_affiliation_strings":["University of Sannio"],"affiliations":[{"raw_affiliation_string":"University of Sannio","institution_ids":["https://openalex.org/I16337185"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nardone, Vittoria","orcid":"https://orcid.org/0000-0001-7888-6620"},"institutions":[{"id":"https://openalex.org/I129627893","display_name":"University of Molise","ror":"https://ror.org/04z08z627","country_code":"IT","type":"education","lineage":["https://openalex.org/I129627893"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Nardone, Vittoria","raw_affiliation_strings":["University of Molise"],"affiliations":[{"raw_affiliation_string":"University of Molise","institution_ids":["https://openalex.org/I129627893"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mastropaolo, Antonio","orcid":"https://orcid.org/0000-0002-7965-7712"},"institutions":[{"id":"https://openalex.org/I57201433","display_name":"Universit\u00e0 della Svizzera italiana","ror":"https://ror.org/03c4atk17","country_code":"CH","type":"education","lineage":["https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Mastropaolo, Antonio","raw_affiliation_strings":["Universit\u00e0 della Svizzera italiana"],"affiliations":[{"raw_affiliation_string":"Universit\u00e0 della Svizzera italiana","institution_ids":["https://openalex.org/I57201433"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Canfora, Gerardo","orcid":"https://orcid.org/0000-0003-0049-1279"},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Canfora, Gerardo","raw_affiliation_strings":["Universit\u00e0 degli Studi del Sannio"],"affiliations":[{"raw_affiliation_string":"Universit\u00e0 degli Studi del Sannio","institution_ids":["https://openalex.org/I16337185"]}]},{"author_position":"middle","author":{"id":null,"display_name":"BAVOTA, Gabriele","orcid":"https://orcid.org/0000-0002-2216-3148"},"institutions":[{"id":"https://openalex.org/I57201433","display_name":"Universit\u00e0 della Svizzera italiana","ror":"https://ror.org/03c4atk17","country_code":"CH","type":"education","lineage":["https://openalex.org/I57201433"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"BAVOTA, Gabriele","raw_affiliation_strings":["Universit\u00e0 della Svizzera italiana"],"affiliations":[{"raw_affiliation_string":"Universit\u00e0 della Svizzera italiana","institution_ids":["https://openalex.org/I57201433"]}]},{"author_position":"last","author":{"id":null,"display_name":"Di Penta, Massimiliano","orcid":"https://orcid.org/0000-0002-0340-9747"},"institutions":[{"id":"https://openalex.org/I16337185","display_name":"University of Sannio","ror":"https://ror.org/04vc81p87","country_code":"IT","type":"education","lineage":["https://openalex.org/I16337185"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Di Penta, Massimiliano","raw_affiliation_strings":["University of Sannio"],"affiliations":[{"raw_affiliation_string":"University of Sannio","institution_ids":["https://openalex.org/I16337185"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A9999999999"],"corresponding_institution_ids":["https://openalex.org/I16337185"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.47997188568115234},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4708040952682495},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38196560740470886},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3226008117198944},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.10361123085021973},{"id":"https://openalex.org/keywords/social-science","display_name":"Social science","score":0.06869682669639587}],"concepts":[{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.47997188568115234},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4708040952682495},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38196560740470886},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3226008117198944},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.10361123085021973},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.06869682669639587}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.10058142","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10058142","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.10058142","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.10058142","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"This":[0],"replication":[1,194],"package":[2,195],"contains":[3,102],"datasets":[4,148,161],"and":[5,19,38,40,68,136,253,349,389,408,424,436,449],"scripts":[6,64,440,443],"related":[7,325],"to":[8,32,66,166,177,200,304,320,326,379,445],"the":[9,34,41,51,63,78,97,127,129,131,137,157,171,179,185,190,193,198,211,217,235,242,249,255,260,263,282,298,306,322,335,341,346,351,386,403,442,454],"paper:":[10],"\"*How":[11],"do":[12],"Hugging":[13,447],"Face":[14,448],"Models":[15],"Document":[16],"Datasets,":[17],"Bias,":[18],"Licenses?":[20],"An":[21],"Empirical":[22],"Study*\"":[23],"##":[24,81,141,276,354,439],"Root":[25],"directory":[26,60,183],"-":[27,45,58,83,90,100,107,117,143,149,162,214,266,271,278,294,301,317,329,356,365,373,391,415,428],"`statistics.r`:":[28],"R":[29],"script":[30,79,165,213],"used":[31,65,112,123,155,444],"compute":[33,305,321],"correlation":[35],"between":[36,420],"usage":[37,103,417],"downloads,":[39],"RQ1/RQ2":[42],"inter-rater":[43,307,323],"agreements":[44],"`modelsInfo.zip`:":[46],"zip":[47],"file":[48,76,206,303,319,338],"containing":[49,61],"all":[50,62],"downloaded":[52],"model":[53,106,122,168,236,243,250,256,292,314,333,404],"cards":[54],"(in":[55],"JSON":[56],"format)":[57],"`script`:":[59],"collect":[67],"process":[69],"data.":[70],"For":[71],"further":[72],"details,":[73],"see":[74],"README":[75,456],"inside":[77],"directory.":[80],"Dataset":[82],"`Dataset/Dataset_HF-models-list.csv`:":[84],"list":[85,92,145,229],"of":[86,93,110,133,139,146,153,160,173,192,219,230,259,269,274,284,286,289,297,309,360,363,368,400,413],"HF":[87,147,239],"models":[88,111,154],"analyzed":[89,209],"`Dataset/Dataset_github-prj-list.txt`:":[91],"GitHub":[94,115,125],"projects":[95],"using":[96],"*transformers*":[98],"library":[99],"`Dataset/Dataset_github-Prj_model-Used.csv`:":[101],"pairs:":[104],"project,":[105],"`Dataset/Dataset_prj-num-models-reused.csv`:":[108],"number":[109,132,138,283],"by":[113,124,210,291],"each":[114,121,226,332,376,394],"project":[116,377],"`Dataset/Dataset_model-download_num-prj_correlation.csv`":[118],"contains,":[119],"for":[120,156,170,225,262,331,375,393],"projects:":[126],"name,":[128],"task,":[130],"reusing":[134],"projects,":[135],"downloads":[140],"RQ1":[142],"`RQ1/RQ1_dataset-list.txt`:":[144],"`RQ1/RQ1_datasetSample.csv`:":[150],"sample":[151,261,362],"set":[152],"manual":[158,264,299,358],"analysis":[159,265],"`RQ1/RQ1_analyzeDatasetTags.py`:":[163],"Python":[164],"analyze":[167],"tags":[169],"presence":[172],"datasets.":[174],"it":[175,384],"requires":[176],"unzip":[178],"`modelsInfo.zip`":[180],"in":[181,204,334,453],"a":[182,205,228,313,361,406],"with":[184,370,432],"same":[186],"name":[187,390],"(`modelsInfo`)":[188],"at":[189],"root":[191],"folder.":[196],"Produces":[197],"output":[199,218,268,273],"stdout.":[201],"To":[202],"redirect":[203],"fo":[207],"be":[208],"`RQ2/countDataset.py`":[212,275],"`RQ1/RQ1_countDataset.py`:":[215],"given":[216],"`RQ2/analyzeDatasetTags.py`":[220,270],"(passed":[221],"as":[222],"argument)":[223],"produces,":[224],"model,":[227,395],"Booleans":[231],"indicating":[232],"whether":[233,310,402],"(i)":[234,340],"only":[237,244],"declares":[238,245,251],"datasets,":[240,247],"(ii)":[241,345],"external":[246],"(iii)":[248,350],"both,":[252],"(iv)":[254],"is":[257],"part":[258],"`RQ1/RQ1_datasetTags.csv`:":[267],"`RQ1/RQ1_dataset_usage_count.csv`:":[272],"RQ2":[277],"`RQ2/tableBias.pdf`:":[279],"table":[280,419],"detailing":[281],"occurrences":[285],"different":[287,371],"types":[288],"bias":[290,327,342],"Task":[293],"`RQ2/RQ2_bias_classification_sheet.csv`:":[295],"results":[296],"labeling":[300],"`RQ2/RQ2_isBiased.csv`:":[302],"agreement":[308,324],"or":[311],"not":[312],"documents":[315],"Bias":[316],"`RQ2/RQ2_biasAgrLabels.csv`:":[318],"categories":[328],"`RQ2/RQ2_final_bias_categories_with_levels.csv`:":[330],"sample,":[336],"this":[337],"lists":[339,367],"leaf":[343],"category,":[344,348],"first-level":[347],"intermediate":[352],"category":[353],"RQ3":[355],"`RQ3/RQ3_LicenseValidation.csv`:":[357],"validation":[359],"licenses":[364,369,422,426,435],"`RQ3/RQ3_{NETWORK-RESTRICTIVE|RESTRICTIVE|WEAK-RESTRICTIVE|PERMISSIVE}-license-list.txt`:":[366],"permissiveness":[372,437],"`RQ3/RQ3_prjs_license.csv`:":[374],"linked":[378],"models,":[380],"among":[381,397],"other":[382,398],"fields":[383],"indicates":[385,396],"license":[387,414],"tag":[388],"`RQ3/RQ3_models_license.csv`:":[392],"pieces":[399],"info,":[401],"has":[405],"license,":[407],"if":[409],"yes":[410],"what":[411],"kind":[412],"`RQ3/RQ3_model-prj-license_contingency_table.csv`:":[416],"contingency":[418],"projects'":[421],"(columns)":[423],"models'":[425],"(rows)":[427],"`RQ3/RQ3_models_prjs_licenses_with_type.csv`:":[429],"pairs":[430],"project-model,":[431],"their":[433],"respective":[434],"level":[438],"Contains":[441],"mine":[446],"GitHub.":[450],"Details":[451],"are":[452],"enclosed":[455]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2024-04-03T00:00:00"}
