{"id":"https://openalex.org/W2585043100","doi":"https://doi.org/10.1109/bigdata.2016.7840987","title":"Content-based comparison for collections identification","display_name":"Content-based comparison for collections identification","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2585043100","doi":"https://doi.org/10.1109/bigdata.2016.7840987","mag":"2585043100"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2016.7840987","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101883722","display_name":"Weijia Xu","orcid":"https://orcid.org/0000-0002-5134-6381"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Weijia Xu","raw_affiliation_strings":["University of Texas at Austin, Austin, TX, US"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, Austin, TX, US","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057218817","display_name":"Ruizhu Huang","orcid":"https://orcid.org/0000-0003-3285-1945"},"institutions":[{"id":"https://openalex.org/I4388891828","display_name":"Texas Advanced Computing Center","ror":"https://ror.org/00xg4bh43","country_code":null,"type":"facility","lineage":["https://openalex.org/I4388891828","https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruizhu Huang","raw_affiliation_strings":["Texas Advanced Computing Center, University of Texas, Austin"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, University of Texas, Austin","institution_ids":["https://openalex.org/I4388891828"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078797277","display_name":"Mar\u00eda Esteva","orcid":"https://orcid.org/0000-0001-6204-4517"},"institutions":[{"id":"https://openalex.org/I4388891828","display_name":"Texas Advanced Computing Center","ror":"https://ror.org/00xg4bh43","country_code":null,"type":"facility","lineage":["https://openalex.org/I4388891828","https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maria Esteva","raw_affiliation_strings":["Texas Advanced Computing Center, University of Texas, Austin"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, University of Texas, Austin","institution_ids":["https://openalex.org/I4388891828"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090962581","display_name":"Jawon Song","orcid":"https://orcid.org/0000-0002-9916-3816"},"institutions":[{"id":"https://openalex.org/I4388891828","display_name":"Texas Advanced Computing Center","ror":"https://ror.org/00xg4bh43","country_code":null,"type":"facility","lineage":["https://openalex.org/I4388891828","https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jawon Song","raw_affiliation_strings":["Texas Advanced Computing Center, University of Texas, Austin"],"affiliations":[{"raw_affiliation_string":"Texas Advanced Computing Center, University of Texas, Austin","institution_ids":["https://openalex.org/I4388891828"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015253000","display_name":"Ramona Walls","orcid":"https://orcid.org/0000-0001-8815-0078"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramona Walls","raw_affiliation_strings":["Cyverse.org"],"affiliations":[{"raw_affiliation_string":"Cyverse.org","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101883722"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":1.7725,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.91333631,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3283","last_page":"3289"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12859","display_name":"Cell Image Analysis Techniques","score":0.9520000219345093,"subfield":{"id":"https://openalex.org/subfields/1304","display_name":"Biophysics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.8704588413238525},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8108655214309692},{"id":"https://openalex.org/keywords/operationalization","display_name":"Operationalization","score":0.6870434284210205},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6828104853630066},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5839086174964905},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.5710645318031311},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.5294910073280334},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.49589166045188904},{"id":"https://openalex.org/keywords/unique-identifier","display_name":"Unique identifier","score":0.43879765272140503},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4324420690536499},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.4131547808647156},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3287811279296875}],"concepts":[{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.8704588413238525},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8108655214309692},{"id":"https://openalex.org/C9354725","wikidata":"https://www.wikidata.org/wiki/Q286017","display_name":"Operationalization","level":2,"score":0.6870434284210205},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6828104853630066},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5839086174964905},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.5710645318031311},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.5294910073280334},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.49589166045188904},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.43879765272140503},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4324420690536499},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.4131547808647156},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3287811279296875},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2016.7840987","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W56820923","https://openalex.org/W142265196","https://openalex.org/W2055043387","https://openalex.org/W2063556310","https://openalex.org/W2079796819","https://openalex.org/W2092020460","https://openalex.org/W2093931624","https://openalex.org/W2114850508","https://openalex.org/W2115567294","https://openalex.org/W2132052897","https://openalex.org/W2148659804","https://openalex.org/W2150457970","https://openalex.org/W2152955481","https://openalex.org/W2189465200","https://openalex.org/W2219660178","https://openalex.org/W2293751269","https://openalex.org/W4236236547","https://openalex.org/W6602390348","https://openalex.org/W6605779593","https://openalex.org/W6687322159"],"related_works":["https://openalex.org/W2413568490","https://openalex.org/W1913624564","https://openalex.org/W3130054399","https://openalex.org/W4301062032","https://openalex.org/W2319837024","https://openalex.org/W2343500026","https://openalex.org/W2390627310","https://openalex.org/W2413664130","https://openalex.org/W2051731675","https://openalex.org/W2033159440"],"abstract_inverted_index":{"Assigning":[0],"global":[1],"unique":[2],"persistent":[3,39,91],"identifiers":[4,40],"(GUPIs)":[5],"to":[6,38,50,86,110],"datasets":[7,36,135],"has":[8],"the":[9,33,51,95,98,101,127,132,143],"goal":[10],"of":[11,35,53,97,134,139],"improving":[12],"their":[13],"accessibility":[14],"and":[15,21,28,64,89,108,136,147,149],"simplifying":[16],"how":[17],"they":[18],"are":[19],"referenced":[20],"reused.":[22],"However,":[23],"as":[24,82],"repositories":[25],"receive":[26],"more":[27,45],"complex":[29],"data,":[30,56],"attesting":[31],"for":[32,78,114],"identity":[34,109,133],"attached":[37],"over":[41],"time":[42],"is":[43,48,58,117],"becoming":[44],"challenging.":[46],"This":[47,70],"due":[49],"nature":[52],"scientific":[54],"research":[55,62],"which":[57,126],"generated":[59],"through":[60,119],"distributed":[61],"practices":[63],"evolves":[65],"across":[66],"different":[67],"computational":[68,76],"environments.":[69],"work":[71],"presents":[72],"a":[73,83],"robust,":[74],"automated":[75],"service":[77,99,116],"data":[79,106,122],"content":[80],"comparison":[81],"valuable":[84],"addition":[85],"assigning,":[87],"managing,":[88],"tracking":[90],"identifiers.":[92],"We":[93,141],"operationalized":[94],"functions":[96],"within":[100],"archival":[102],"space":[103],"by":[104],"linking":[105],"provenance":[107],"authenticity.":[111],"The":[112],"need":[113],"such":[115],"shown":[118],"three":[120],"genomics":[121],"use":[123],"cases":[124],"in":[125],"results":[128],"aided":[129],"curators":[130],"establishing":[131],"inferring":[137],"issues":[138],"provenance.":[140],"describe":[142],"system's":[144],"design,":[145],"implementation":[146],"performance,":[148],"report":[150],"on":[151],"lessons":[152],"learned.":[153]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
