{"id":"https://openalex.org/W2035583517","doi":"https://doi.org/10.1109/bigdata.2013.6691678","title":"A case study on entity Resolution for Distant Processing of big Humanities data","display_name":"A case study on entity Resolution for Distant Processing of big Humanities data","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W2035583517","doi":"https://doi.org/10.1109/bigdata.2013.6691678","mag":"2035583517"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2013.6691678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2013.6691678","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Big Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101883722","display_name":"Weijia Xu","orcid":"https://orcid.org/0000-0002-5134-6381"},"institutions":[{"id":"https://openalex.org/I4210143490","display_name":"Computing Center","ror":"https://ror.org/0557kgc34","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210143490","https://openalex.org/I4210148470"]},{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["RU","US"],"is_corresponding":true,"raw_author_name":"Weijia Xu","raw_affiliation_strings":["Advanced Computing Center","Texas Adv. Comput. Center, Univ. of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Computing Center","institution_ids":["https://openalex.org/I4210143490"]},{"raw_affiliation_string":"Texas Adv. Comput. Center, Univ. of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078797277","display_name":"Mar\u00eda Esteva","orcid":"https://orcid.org/0000-0001-6204-4517"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]},{"id":"https://openalex.org/I4210143490","display_name":"Computing Center","ror":"https://ror.org/0557kgc34","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210143490","https://openalex.org/I4210148470"]}],"countries":["RU","US"],"is_corresponding":false,"raw_author_name":"Maria Esteva","raw_affiliation_strings":["Advanced Computing Center","Texas Adv. Comput. Center, Univ. of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Computing Center","institution_ids":["https://openalex.org/I4210143490"]},{"raw_affiliation_string":"Texas Adv. Comput. Center, Univ. of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075563922","display_name":"Jessica Trelogan","orcid":"https://orcid.org/0000-0002-5932-0696"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jessica Trelogan","raw_affiliation_strings":["Institute of Classical Archaeology, University of Texas at Austin","Inst. of Classical Archaeology, Univ. of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Institute of Classical Archaeology, University of Texas at Austin","institution_ids":[]},{"raw_affiliation_string":"Inst. of Classical Archaeology, Univ. of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031064682","display_name":"Todd Swinson","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Todd Swinson","raw_affiliation_strings":["Department of Computer Sciences","Department of Computer Science, University of Texas at Austin, Austin, TX, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Sciences","institution_ids":[]},{"raw_affiliation_string":"Department of Computer Science, University of Texas at Austin, Austin, TX, USA#TAB#","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101883722"],"corresponding_institution_ids":["https://openalex.org/I4210143490","https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.411,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.68918076,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1","issue":null,"first_page":"113","last_page":"120"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7777457237243652},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6960595846176147},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5880920886993408},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.49425792694091797},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4927082359790802},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.48108264803886414},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.47506850957870483},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.46373453736305237},{"id":"https://openalex.org/keywords/metadata-management","display_name":"Metadata management","score":0.41263070702552795},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.4112417995929718},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4105564057826996},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.30032801628112793}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7777457237243652},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6960595846176147},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5880920886993408},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.49425792694091797},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4927082359790802},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.48108264803886414},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.47506850957870483},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.46373453736305237},{"id":"https://openalex.org/C2779489174","wikidata":"https://www.wikidata.org/wiki/Q6822246","display_name":"Metadata management","level":3,"score":0.41263070702552795},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.4112417995929718},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4105564057826996},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.30032801628112793},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2013.6691678","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2013.6691678","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE International Conference on Big Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W156995665","https://openalex.org/W561535798","https://openalex.org/W1547556487","https://openalex.org/W1547612978","https://openalex.org/W1596642260","https://openalex.org/W1606013962","https://openalex.org/W1966356743","https://openalex.org/W1981590391","https://openalex.org/W1990028388","https://openalex.org/W1993081718","https://openalex.org/W1995099886","https://openalex.org/W2003701321","https://openalex.org/W2016281502","https://openalex.org/W2017102965","https://openalex.org/W2081154279","https://openalex.org/W2086485446","https://openalex.org/W2095244036","https://openalex.org/W2108991785","https://openalex.org/W2148019918","https://openalex.org/W2149827033","https://openalex.org/W2887774225","https://openalex.org/W3012544343","https://openalex.org/W3125261728","https://openalex.org/W3146259567","https://openalex.org/W3147597245","https://openalex.org/W4244181777","https://openalex.org/W4302802341","https://openalex.org/W6606379709","https://openalex.org/W6636351412","https://openalex.org/W6754271844","https://openalex.org/W6775717076"],"related_works":["https://openalex.org/W1487702188","https://openalex.org/W2374913837","https://openalex.org/W2460834149","https://openalex.org/W1512114296","https://openalex.org/W2354316773","https://openalex.org/W2376038160","https://openalex.org/W4205875026","https://openalex.org/W2389651964","https://openalex.org/W3181164120","https://openalex.org/W2001042954"],"abstract_inverted_index":{"At":[0],"the":[1,7,68,124,138,149,153,163,174],"forefront":[2],"of":[3,71,102,123,165],"big":[4],"data":[5,22,40,61,75,109,133,180],"in":[6,39,58],"Humanities,":[8],"collections":[9,14],"management":[10,23,62,134,181],"can":[11,42],"directly":[12],"impact":[13],"access":[15],"and":[16,33,66,92,99,112,126,147,167],"reuse.":[17],"However,":[18],"curators":[19,57],"using":[20],"traditional":[21],"methods":[24],"for":[25,64,178],"tasks":[26],"such":[27],"as":[28,80],"identifying":[29],"redundant":[30],"from":[31],"relevant":[32],"related":[34,104,166],"records,":[35],"a":[36,81,86,115,128,144],"small":[37],"increase":[38,44],"volume":[41],"significantly":[43],"their":[45,96],"workload.":[46],"In":[47],"this":[48],"paper,":[49],"we":[50,84,141],"present":[51],"preliminary":[52],"work":[53],"aimed":[54],"at":[55],"assisting":[56],"making":[59,179],"important":[60],"decisions":[63],"organizing":[65],"improving":[67],"overall":[69],"quality":[70],"large":[72],"unstructured":[73],"Humanities":[74],"collections.":[76],"Using":[77],"Entity":[78],"Resolution":[79],"conceptual":[82],"framework,":[83],"created":[85],"similarity":[87],"model":[88,158,175],"that":[89,119,161,173],"compares":[90],"directories":[91],"files":[93],"based":[94],"on":[95,132],"implicit":[97],"metadata,":[98],"clusters":[100,125,154],"pairs":[101],"closely":[103],"directories.":[105],"Useful":[106],"relationships":[107],"between":[108],"are":[110],"identified":[111],"presented":[113],"through":[114],"graphical":[116],"user":[117],"interface":[118],"allows":[120],"qualitative":[121],"evaluation":[122],"provides":[127],"guide":[129],"to":[130,151,156],"decide":[131],"actions.":[135],"To":[136],"evaluate":[137],"model's":[139],"performance,":[140],"experimented":[142],"with":[143],"test":[145],"collection":[146],"asked":[148],"curator":[150],"classify":[152],"according":[155],"four":[157],"cluster":[159],"configurations":[160],"consider":[162],"presence":[164],"duplicate":[168],"information.":[169],"Evaluation":[170],"results":[171],"suggest":[172],"is":[176],"useful":[177],"action":[182],"decisions.":[183]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
