{"id":"https://openalex.org/W2064958995","doi":"https://doi.org/10.1145/1851476.1851559","title":"Towards long term data quality in a large scale biometrics experiment","display_name":"Towards long term data quality in a large scale biometrics experiment","publication_year":2010,"publication_date":"2010-06-21","ids":{"openalex":"https://openalex.org/W2064958995","doi":"https://doi.org/10.1145/1851476.1851559","mag":"2064958995"},"language":"en","primary_location":{"id":"doi:10.1145/1851476.1851559","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1851476.1851559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102970095","display_name":"Hoang Bui","orcid":"https://orcid.org/0000-0003-2320-667X"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hoang Bui","raw_affiliation_strings":["University of Notre, Dame"],"affiliations":[{"raw_affiliation_string":"University of Notre, Dame","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110409445","display_name":"Diane Wright","orcid":null},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Diane Wright","raw_affiliation_strings":["University of Notre, Dame"],"affiliations":[{"raw_affiliation_string":"University of Notre, Dame","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034072328","display_name":"Clarence Helm","orcid":null},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Clarence Helm","raw_affiliation_strings":["University of Notre, Dame"],"affiliations":[{"raw_affiliation_string":"University of Notre, Dame","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062731998","display_name":"Rachel Witty","orcid":null},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rachel Witty","raw_affiliation_strings":["University of Notre, Dame"],"affiliations":[{"raw_affiliation_string":"University of Notre, Dame","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039987576","display_name":"Patrick J. Flynn","orcid":"https://orcid.org/0000-0002-5446-114X"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Patrick Flynn","raw_affiliation_strings":["University of Notre, Dame"],"affiliations":[{"raw_affiliation_string":"University of Notre, Dame","institution_ids":["https://openalex.org/I107639228"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007301255","display_name":"Douglas Thain","orcid":"https://orcid.org/0000-0001-5218-1956"},"institutions":[{"id":"https://openalex.org/I107639228","display_name":"University of Notre Dame","ror":"https://ror.org/00mkhxb43","country_code":"US","type":"education","lineage":["https://openalex.org/I107639228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas Thain","raw_affiliation_strings":["University of Notre, Dame"],"affiliations":[{"raw_affiliation_string":"University of Notre, Dame","institution_ids":["https://openalex.org/I107639228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102970095"],"corresponding_institution_ids":["https://openalex.org/I107639228"],"apc_list":null,"apc_paid":null,"fwci":1.2091,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.82379501,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"565","last_page":"572"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11800","display_name":"User Authentication and Security Systems","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9641000032424927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/terabyte","display_name":"Terabyte","score":0.8282354474067688},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8280791640281677},{"id":"https://openalex.org/keywords/biometrics","display_name":"Biometrics","score":0.8031936883926392},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.7644069194793701},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7186266183853149},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5544650554656982},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5479001998901367},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5274091958999634},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5024409294128418},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.44110092520713806},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.4408615827560425},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4264277219772339},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4255889058113098},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.36849671602249146},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20477339625358582},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1297840178012848},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09414035081863403}],"concepts":[{"id":"https://openalex.org/C199683683","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Terabyte","level":2,"score":0.8282354474067688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8280791640281677},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.8031936883926392},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.7644069194793701},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7186266183853149},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5544650554656982},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5479001998901367},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5274091958999634},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5024409294128418},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.44110092520713806},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.4408615827560425},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4264277219772339},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4255889058113098},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36849671602249146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20477339625358582},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1297840178012848},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09414035081863403},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1851476.1851559","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1851476.1851559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.361.6234","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.361.6234","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cse.nd.edu/~ccl/research/pubs/bxgrid-mdqcs10.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5299999713897705,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G3585498092","display_name":null,"funder_award_id":"CCF-06-21434CNS-06-43229CNS-01-30839","funder_id":"https://openalex.org/F4320337388","funder_display_name":"Division of Computer and Network Systems"},{"id":"https://openalex.org/G5588982880","display_name":null,"funder_award_id":"CCF-06-21434CNS-06-43229CNS-01-30839","funder_id":"https://openalex.org/F4320337387","funder_display_name":"Division of Computing and Communication Foundations"}],"funders":[{"id":"https://openalex.org/F4320337387","display_name":"Division of Computing and Communication Foundations","ror":"https://ror.org/01mng8331"},{"id":"https://openalex.org/F4320337388","display_name":"Division of Computer and Network Systems","ror":"https://ror.org/02rdzmk74"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1549518189","https://openalex.org/W1557594219","https://openalex.org/W1974821667","https://openalex.org/W1989702938","https://openalex.org/W2115916845","https://openalex.org/W2125165932","https://openalex.org/W2130651654","https://openalex.org/W2140630145","https://openalex.org/W2150511069","https://openalex.org/W2159679741","https://openalex.org/W2161234420","https://openalex.org/W2183844215","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2066858118","https://openalex.org/W3011699130","https://openalex.org/W2134017072","https://openalex.org/W3138818610","https://openalex.org/W3081133439","https://openalex.org/W4386246791","https://openalex.org/W2945537679","https://openalex.org/W3211701140","https://openalex.org/W2952280724","https://openalex.org/W2133103607"],"abstract_inverted_index":{"Quality":[0],"of":[1,18,76,84,123],"data":[2,28,50,103,106],"plays":[3],"a":[4,31,39],"very":[5],"important":[6],"role":[7],"in":[8,24],"any":[9],"scientific":[10],"research.":[11,53,86],"In":[12],"this":[13],"paper":[14],"we":[15,22,91],"present":[16],"some":[17],"the":[19,67,73,82,98,119],"challenges":[20],"that":[21],"face":[23],"managing":[25],"and":[26,48,78,96,108,115,121],"maintaining":[27],"quality":[29,75,120],"for":[30,51],"terabyte":[32],"scale":[33],"biometrics":[34,52,85],"repository.":[35],"We":[36,87],"have":[37,92],"developed":[38],"step":[40,42],"by":[41],"model":[43],"to":[44,94,117],"capture,":[45],"ingest,":[46],"validate,":[47],"prepare":[49],"During":[54],"these":[55],"processes,":[56],"there":[57],"are":[58,112],"many":[59],"hidden":[60],"errors":[61,70],"which":[62],"can":[63,71,80],"be":[64],"introduced":[65],"into":[66],"data.":[68,125],"Those":[69],"affect":[72],"overall":[74],"data,":[77],"thus":[79],"skew":[81],"results":[83],"discuss":[88],"necessary":[89,114],"steps":[90],"taken":[93],"reduce":[95],"eliminate":[97],"errors.":[99],"Steps":[100],"such":[101],"as":[102],"replication,":[104],"automated":[105],"validation,":[107],"logging":[109],"metadata":[110],"changes":[111],"both":[113],"crucial":[116],"improve":[118],"reliability":[122],"our":[124]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
