{"id":"https://openalex.org/W6929375910","doi":"https://doi.org/10.48550/arxiv.2505.17841","title":"TEDI: Trustworthy and Ethical Dataset Indicators to Analyze and Compare Dataset Documentation","display_name":"TEDI: Trustworthy and Ethical Dataset Indicators to Analyze and Compare Dataset Documentation","publication_year":2025,"publication_date":"2025-05-23","ids":{"openalex":"https://openalex.org/W6929375910","doi":"https://doi.org/10.48550/arxiv.2505.17841"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2505.17841","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.17841","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2505.17841","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Hutiri, Wiebke","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hutiri, Wiebke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cimpoi, Mircea","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cimpoi, Mircea","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Scheuerman, Morgan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Scheuerman, Morgan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Matthews, Victoria","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthews, Victoria","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Xiang, Alice","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang, Alice","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.8149999976158142,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.8149999976158142,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.06800000369548798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.050700001418590546,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.7910000085830688},{"id":"https://openalex.org/keywords/transparency","display_name":"Transparency (behavior)","score":0.7452999949455261},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.6632000207901001},{"id":"https://openalex.org/keywords/enabling","display_name":"Enabling","score":0.527999997138977},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.51419997215271},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.4814000129699707},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.43639999628067017}],"concepts":[{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.7910000085830688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7520999908447266},{"id":"https://openalex.org/C2780233690","wikidata":"https://www.wikidata.org/wiki/Q535347","display_name":"Transparency (behavior)","level":2,"score":0.7452999949455261},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.6632000207901001},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5759000182151794},{"id":"https://openalex.org/C22607594","wikidata":"https://www.wikidata.org/wiki/Q5375150","display_name":"Enabling","level":2,"score":0.527999997138977},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.51419997215271},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.3628999888896942},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.335099995136261},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32089999318122864},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3158999979496002},{"id":"https://openalex.org/C197352329","wikidata":"https://www.wikidata.org/wiki/Q1093434","display_name":"Citizen science","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.27140000462532043},{"id":"https://openalex.org/C2986663376","wikidata":"https://www.wikidata.org/wiki/Q9465","display_name":"Ethical issues","level":2,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2505.17841","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.17841","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2505.17841","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.17841","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5813259482383728}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dataset":[0,42],"transparency":[1,203],"is":[2,187],"a":[3,125],"key":[4],"enabler":[5],"of":[6,21,51,65,162,183,218],"responsible":[7],"AI,":[8],"but":[9,186],"insights":[10,109,198],"into":[11,110],"multimodal":[12,66,92],"dataset":[13,52,81,202,222],"attributes":[14,64,131],"that":[15,45,59,94,113,123],"impact":[16],"trustworthy":[17,61,115,205],"and":[18,26,40,62,68,88,104,116,132,138,147,167,196,206,209],"ethical":[19,63,117,149,184,207],"aspects":[20],"AI":[22],"applications":[23],"remain":[24],"scarce":[25],"are":[27,74,151],"difficult":[28],"to":[29,76,107,135,144,174,200],"compare":[30],"across":[31,119],"datasets.":[32,120],"To":[33],"address":[34],"this":[35],"challenge,":[36],"we":[37,85],"introduce":[38],"Trustworthy":[39],"Ethical":[41],"Indicators":[43],"(TEDI)":[44],"facilitate":[46],"the":[47,111,156,181,189,211,215],"systematic,":[48],"empirical":[49,197],"analysis":[50],"documentation.":[53,82],"TEDI":[54],"encompasses":[55],"143":[56],"fine-grained":[57],"indicators":[58,73,150],"characterize":[60],"datasets":[67,93,128,163],"their":[69],"collection":[70,158,169,192],"processes.":[71],"The":[72,142],"framed":[75],"extract":[77],"verifiable":[78],"information":[79,220],"from":[80,221],"Using":[83],"TEDI,":[84],"manually":[86],"annotated":[87,100],"analyzed":[89],"over":[90],"100":[91],"include":[95],"human":[96],"voices.":[97],"We":[98,121],"further":[99],"data":[101,157],"sourcing,":[102],"size,":[103],"modality":[105],"details":[106],"gain":[108],"factors":[112],"shape":[114],"dimensions":[118,208],"find":[122],"only":[124,190],"select":[126],"few":[127],"have":[129],"documented":[130],"practices":[133],"pertaining":[134],"consent,":[136],"privacy,":[137],"harmful":[139],"content":[140],"indicators.":[141],"extent":[143],"which":[145],"these":[146],"other":[148],"addressed":[152],"varies":[153],"based":[154],"on":[155],"method,":[159],"with":[160],"documentation":[161,223],"collected":[164],"via":[165],"crowdsourced":[166],"direct":[168],"approaches":[170],"being":[171],"more":[172],"likely":[173],"mention":[175],"them.":[176],"Scraping":[177],"dominates":[178],"scale":[179],"at":[180],"cost":[182],"indicators,":[185],"not":[188],"viable":[191],"method.":[193],"Our":[194],"approach":[195],"contribute":[199],"increasing":[201],"along":[204],"pave":[210],"way":[212],"for":[213],"automating":[214],"tedious":[216],"task":[217],"extracting":[219],"in":[224],"future.":[225]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
