{"id":"https://openalex.org/W4205961906","doi":"https://doi.org/10.1109/bigdata52589.2021.9671414","title":"Retrieving of Data Similarity using Metadata on a Data Analysis Competition Platform","display_name":"Retrieving of Data Similarity using Metadata on a Data Analysis Competition Platform","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4205961906","doi":"https://doi.org/10.1109/bigdata52589.2021.9671414"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671414","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671414","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028823648","display_name":"Hiroki Sakaji","orcid":"https://orcid.org/0000-0001-5030-625X"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hiroki Sakaji","raw_affiliation_strings":["Department of Systems Innovation, The University of Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Systems Innovation, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015928651","display_name":"Teruaki Hayashi","orcid":"https://orcid.org/0000-0002-1806-5852"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Teruaki Hayashi","raw_affiliation_strings":["Department of Systems Innovation, The University of Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Systems Innovation, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085809235","display_name":"Yoshiaki Fukami","orcid":"https://orcid.org/0000-0002-7838-8215"},"institutions":[{"id":"https://openalex.org/I207169309","display_name":"Cyber University","ror":"https://ror.org/038sdcw17","country_code":"JP","type":"education","lineage":["https://openalex.org/I207169309"]},{"id":"https://openalex.org/I203951103","display_name":"Keio University","ror":"https://ror.org/02kn6nx58","country_code":"JP","type":"education","lineage":["https://openalex.org/I203951103"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshiaki Fukami","raw_affiliation_strings":["Cyber Civilization Research Center, Keio University, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Cyber Civilization Research Center, Keio University, Tokyo, Japan","institution_ids":["https://openalex.org/I207169309","https://openalex.org/I203951103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017231364","display_name":"Takumi Shimizu","orcid":"https://orcid.org/0000-0002-9968-4072"},"institutions":[{"id":"https://openalex.org/I203951103","display_name":"Keio University","ror":"https://ror.org/02kn6nx58","country_code":"JP","type":"education","lineage":["https://openalex.org/I203951103"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takumi Shimizu","raw_affiliation_strings":["Faculty of Policy Management, Keio University, Kanagawa, Japan"],"affiliations":[{"raw_affiliation_string":"Faculty of Policy Management, Keio University, Kanagawa, Japan","institution_ids":["https://openalex.org/I203951103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088242177","display_name":"Hiroyasu Matsushima","orcid":"https://orcid.org/0000-0001-7301-1956"},"institutions":[{"id":"https://openalex.org/I171494771","display_name":"Shiga University","ror":"https://ror.org/01vvhy971","country_code":"JP","type":"education","lineage":["https://openalex.org/I171494771"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hiroyasu Matsushima","raw_affiliation_strings":["Center for Data Science Education and Research, Shiga University, Shiga, Japan"],"affiliations":[{"raw_affiliation_string":"Center for Data Science Education and Research, Shiga University, Shiga, Japan","institution_ids":["https://openalex.org/I171494771"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044205949","display_name":"Kiyoshi Izumi","orcid":"https://orcid.org/0000-0003-0870-7310"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kiyoshi Izumi","raw_affiliation_strings":["Department of Systems Innovation, The University of Tokyo, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Systems Innovation, The University of Tokyo, Tokyo, Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5028823648"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":2.133,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.89454806,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3480","last_page":"3485"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8347935676574707},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7801991701126099},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5743353366851807},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5637328624725342},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.5440144538879395},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4673595428466797},{"id":"https://openalex.org/keywords/usability","display_name":"Usability","score":0.44147005677223206},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42089104652404785},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.41531887650489807},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3540220856666565},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2807806134223938},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.20869943499565125},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16284817457199097},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.08738505840301514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8347935676574707},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7801991701126099},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5743353366851807},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5637328624725342},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.5440144538879395},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4673595428466797},{"id":"https://openalex.org/C170130773","wikidata":"https://www.wikidata.org/wiki/Q216378","display_name":"Usability","level":2,"score":0.44147005677223206},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42089104652404785},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.41531887650489807},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3540220856666565},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2807806134223938},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.20869943499565125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16284817457199097},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.08738505840301514},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671414","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671414","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W123941891","https://openalex.org/W1557929622","https://openalex.org/W1566018662","https://openalex.org/W1567769573","https://openalex.org/W1584739173","https://openalex.org/W1614298861","https://openalex.org/W1970842529","https://openalex.org/W1978697738","https://openalex.org/W1983578042","https://openalex.org/W2012179416","https://openalex.org/W2022470916","https://openalex.org/W2066422409","https://openalex.org/W2121184547","https://openalex.org/W2137607259","https://openalex.org/W2165897980","https://openalex.org/W2216189112","https://openalex.org/W2250351693","https://openalex.org/W2251427843","https://openalex.org/W2260378577","https://openalex.org/W2508865106","https://openalex.org/W2735637226","https://openalex.org/W2896457183","https://openalex.org/W2911795772","https://openalex.org/W2957347123","https://openalex.org/W2970641574","https://openalex.org/W2991099293","https://openalex.org/W2999392930","https://openalex.org/W3008423135","https://openalex.org/W3011689050","https://openalex.org/W3147657411","https://openalex.org/W3148390575","https://openalex.org/W4254304201","https://openalex.org/W6633661181","https://openalex.org/W6634167773","https://openalex.org/W6635121792","https://openalex.org/W6636510571","https://openalex.org/W6680585821","https://openalex.org/W6691735759","https://openalex.org/W6725015247","https://openalex.org/W6755207826","https://openalex.org/W6767552790","https://openalex.org/W6793437143"],"related_works":["https://openalex.org/W2980729574","https://openalex.org/W1560851690","https://openalex.org/W3092047717","https://openalex.org/W3110772647","https://openalex.org/W2770162183","https://openalex.org/W2894231409","https://openalex.org/W2947721150","https://openalex.org/W2995297654","https://openalex.org/W3210334372","https://openalex.org/W4287385180"],"abstract_inverted_index":{"In":[0,95,122],"recent":[1],"years,":[2],"instead":[3],"of":[4,32,74,92,105,111,139,163,195],"closing":[5],"data":[6,19,26,37,41,60,112,115,131,153,186,204],"and":[7,44,56,76,78,90,107,146,151],"analysis":[8,20],"skills":[9],"in-house,":[10],"there":[11,79],"has":[12,180],"been":[13],"much":[14],"interest":[15],"in":[16],"widely":[17],"releasing":[18],"knowledge":[21],"on":[22,50,71,100,113],"the":[23,47,59,72,87,109,127,130,137,140,161,181,190,193,196],"web.":[24],"A":[25],"exchange":[27,116,132,187],"platform":[28,34,117,133],"is":[29,80],"a":[30,101,114],"type":[31],"digital":[33],"that":[35,178],"exchanges":[36],"between":[38,170],"stakeholders,":[39],"e.g.,":[40],"owners,":[42],"users,":[43],"analysts.":[45],"However,":[46],"datasets":[48,67],"handled":[49],"such":[51],"platforms":[52],"are":[53,68],"independently":[54],"acquired":[55],"stored":[57],"by":[58,166],"providers":[61],"for":[62,201],"their":[63],"own":[64],"purposes.":[65],"These":[66],"not":[69],"based":[70],"premise":[73],"coordination":[75],"combination,":[77],"currently":[81],"little":[82],"information":[83,104],"available":[84],"to":[85,155],"discuss":[86],"systematic":[88],"organization":[89],"combination":[91],"these":[93],"datasets.":[94],"this":[96],"study,":[97],"we":[98,125,176],"focus":[99],"metadata,":[102],"summary":[103],"data,":[106,141],"examine":[108],"similarity":[110,138],"using":[118],"natural":[119,197],"language":[120,198],"processing.":[121],"our":[123,142,158],"experiments,":[124],"use":[126],"metadata":[128],"from":[129],"Kaggle.":[134],"To":[135],"compare":[136],"method":[143,159,200],"employs":[144],"word2vec":[145],"BERT":[147],"as":[148,184],"vectorize":[149],"methods":[150],"converts":[152],"descriptions":[154],"vectors.":[156],"Then,":[157],"measures":[160],"distances":[162],"each":[164,171],"vector":[165],"calculating":[167],"cosine":[168],"similarities":[169],"vector.":[172],"From":[173],"experimental":[174],"results,":[175],"found":[177],"Kaggle":[179],"same":[182],"character":[183],"other":[185],"platforms.":[188],"Additionally,":[189],"results":[191],"indicated":[192],"usability":[194],"processing-based":[199],"extracting":[202],"similar":[203],"pairs.":[205]},"counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
