{"id":"https://openalex.org/W4406458741","doi":"https://doi.org/10.1109/bigdata62323.2024.10825245","title":"Metadata-less Dataset Recommendation Leveraging Dataset Embeddings by Pre-trained Tabular Language Models","display_name":"Metadata-less Dataset Recommendation Leveraging Dataset Embeddings by Pre-trained Tabular Language Models","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458741","doi":"https://doi.org/10.1109/bigdata62323.2024.10825245"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110680431","display_name":"K. Manabe","orcid":null},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kosuke Manabe","raw_affiliation_strings":["The University of Tokyo,School of Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,School of Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043268072","display_name":"Yukihisa Fujita","orcid":"https://orcid.org/0000-0002-0581-5116"},"institutions":[{"id":"https://openalex.org/I4210137853","display_name":"Toyota Motor Corporation (Japan)","ror":"https://ror.org/02zqm6r10","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210125472","https://openalex.org/I4210137853"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yukihisa Fujita","raw_affiliation_strings":["Toyota Motor Corporation,Social System PF Development Division,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"Toyota Motor Corporation,Social System PF Development Division,Tokyo,Japan","institution_ids":["https://openalex.org/I4210137853"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049693274","display_name":"Masahiro Kuwahara","orcid":"https://orcid.org/0000-0003-2896-8622"},"institutions":[{"id":"https://openalex.org/I4210137853","display_name":"Toyota Motor Corporation (Japan)","ror":"https://ror.org/02zqm6r10","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210125472","https://openalex.org/I4210137853"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masahiro Kuwahara","raw_affiliation_strings":["Toyota Motor Corporation,Social System PF Development Division,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"Toyota Motor Corporation,Social System PF Development Division,Tokyo,Japan","institution_ids":["https://openalex.org/I4210137853"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015928651","display_name":"Teruaki Hayashi","orcid":"https://orcid.org/0000-0002-1806-5852"},"institutions":[{"id":"https://openalex.org/I74801974","display_name":"The University of Tokyo","ror":"https://ror.org/057zh3y96","country_code":"JP","type":"education","lineage":["https://openalex.org/I74801974"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Teruaki Hayashi","raw_affiliation_strings":["The University of Tokyo,School of Engineering,Tokyo,Japan"],"affiliations":[{"raw_affiliation_string":"The University of Tokyo,School of Engineering,Tokyo,Japan","institution_ids":["https://openalex.org/I74801974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110680431"],"corresponding_institution_ids":["https://openalex.org/I74801974"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23736804,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6604","last_page":"6613"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.9107993841171265},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8725021481513977},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.588078498840332},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5121438503265381},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4077640175819397},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.37511658668518066},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35298484563827515}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.9107993841171265},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8725021481513977},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.588078498840332},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5121438503265381},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4077640175819397},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.37511658668518066},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35298484563827515}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322622","display_name":"Toyota Motor Corporation","ror":"https://ror.org/02zqm6r10"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W56775633","https://openalex.org/W2151549208","https://openalex.org/W2162020046","https://openalex.org/W2341748398","https://openalex.org/W2522934524","https://openalex.org/W2750779823","https://openalex.org/W2788550262","https://openalex.org/W2789631454","https://openalex.org/W2790400697","https://openalex.org/W2798818514","https://openalex.org/W2896457183","https://openalex.org/W2899286282","https://openalex.org/W2920913768","https://openalex.org/W2926805670","https://openalex.org/W2941366772","https://openalex.org/W2969723769","https://openalex.org/W2970641574","https://openalex.org/W3012644370","https://openalex.org/W3035140194","https://openalex.org/W3035231859","https://openalex.org/W3099700870","https://openalex.org/W3099839495","https://openalex.org/W3102264439","https://openalex.org/W3108941002","https://openalex.org/W3138414763","https://openalex.org/W3156636935","https://openalex.org/W3158303960","https://openalex.org/W4205922070","https://openalex.org/W4294558859","https://openalex.org/W4309505042","https://openalex.org/W4365456672","https://openalex.org/W4384648452","https://openalex.org/W4392667206","https://openalex.org/W6753529518","https://openalex.org/W6754417798","https://openalex.org/W6755207826","https://openalex.org/W6760568010","https://openalex.org/W6774314701","https://openalex.org/W6791882803","https://openalex.org/W6810253028","https://openalex.org/W6847198859","https://openalex.org/W6854475297","https://openalex.org/W7071593569"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2058118494","https://openalex.org/W2392768766","https://openalex.org/W2382021449","https://openalex.org/W2095118173","https://openalex.org/W2106424170","https://openalex.org/W1985426483","https://openalex.org/W2501188010","https://openalex.org/W4299935056","https://openalex.org/W2010935248"],"abstract_inverted_index":{"The":[0,95],"acceleration":[1],"of":[2,10,18,49,52,117],"data-driven":[3],"business":[4],"and":[5,55,85,137],"research":[6],"through":[7],"the":[8,16,28,50,112,115,118],"use":[9],"third-party":[11],"datasets":[12,66],"has":[13],"led":[14],"to":[15],"emergence":[17],"data":[19,24,135],"platforms":[20],"that":[21,99,107,133],"enable":[22],"cross-disciplinary":[23],"exchange,":[25],"thereby":[26],"increasing":[27],"need":[29],"for":[30,64],"dataset":[31,70],"recommendations.":[32],"In":[33],"this":[34],"climate,":[35],"traditional":[36],"retrieval":[37],"systems,":[38],"such":[39],"as":[40,126,128],"explanatory":[41],"information":[42,71],"in":[43,47,143],"metadata,":[44,131],"have":[45],"limitations":[46],"terms":[48],"reliability":[51],"metadata":[53,124,146],"descriptions":[54],"their":[56],"creation":[57],"cost.":[58],"This":[59],"study":[60],"proposes":[61],"a":[62],"method":[63],"recommending":[65],"by":[67],"leveraging":[68],"actual":[69],"without":[72,123],"relying":[73],"on":[74],"meta-data.":[75],"We":[76],"performed":[77,125,141],"two":[78],"metric":[79,87,101],"learning":[80,84,102],"methods,":[81],"unsupervised":[82],"contrastive":[83],"table-query":[86],"learning,":[88],"utilizing":[89],"pre-trained":[90],"tabular":[91],"language":[92],"models":[93],"(TaLMs).":[94],"experimental":[96],"results":[97],"suggest":[98],"these":[100],"methods":[103],"can":[104,139],"obtain":[105],"representations":[106],"are":[108,147],"more":[109],"consistent":[110],"with":[111,130],"labels":[113],"representing":[114],"topics":[116],"datasets.":[119],"Moreover,":[120],"our":[121],"embeddings":[122],"well":[127],"those":[129],"suggesting":[132],"appropriate":[134],"extraction":[136],"clustering":[138],"be":[140],"even":[142],"cases":[144],"where":[145],"sparse":[148],"or":[149],"incomplete.":[150]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
