{"id":"https://openalex.org/W3136755792","doi":"https://doi.org/10.1109/bigdata50022.2020.9377769","title":"Towards Tabular Embeddings, Training the Relational Models","display_name":"Towards Tabular Embeddings, Training the Relational Models","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3136755792","doi":"https://doi.org/10.1109/bigdata50022.2020.9377769","mag":"3136755792"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9377769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377769","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071549682","display_name":"Rituparna Khan","orcid":null},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rituparna Khan","raw_affiliation_strings":["Florida State University"],"affiliations":[{"raw_affiliation_string":"Florida State University","institution_ids":["https://openalex.org/I103163165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047938200","display_name":"Michael Gubanov","orcid":"https://orcid.org/0000-0002-1354-1215"},"institutions":[{"id":"https://openalex.org/I103163165","display_name":"Florida State University","ror":"https://ror.org/05g3dte14","country_code":"US","type":"education","lineage":["https://openalex.org/I103163165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Gubanov","raw_affiliation_strings":["Florida State University"],"affiliations":[{"raw_affiliation_string":"Florida State University","institution_ids":["https://openalex.org/I103163165"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5071549682"],"corresponding_institution_ids":["https://openalex.org/I103163165"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.21445927,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"5724","last_page":"5726"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7871168851852417},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.6090167164802551},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5607873201370239},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.5526196360588074},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5195068120956421},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4711643159389496},{"id":"https://openalex.org/keywords/schema-matching","display_name":"Schema matching","score":0.45697152614593506},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4497891068458557},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4402920603752136},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.42806434631347656},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.41290023922920227},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3861278295516968},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37134209275245667},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.11032924056053162},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08513540029525757}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7871168851852417},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.6090167164802551},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5607873201370239},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.5526196360588074},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5195068120956421},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4711643159389496},{"id":"https://openalex.org/C2777327318","wikidata":"https://www.wikidata.org/wiki/Q1408390","display_name":"Schema matching","level":3,"score":0.45697152614593506},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4497891068458557},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4402920603752136},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.42806434631347656},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.41290023922920227},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3861278295516968},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37134209275245667},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.11032924056053162},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08513540029525757},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9377769","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377769","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.550000011920929,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1964049590","https://openalex.org/W2003575867","https://openalex.org/W2008896880","https://openalex.org/W2066232799","https://openalex.org/W2092486351","https://openalex.org/W2108267498","https://openalex.org/W2153579005","https://openalex.org/W2165467455","https://openalex.org/W2171308734","https://openalex.org/W2400847108","https://openalex.org/W2407983160","https://openalex.org/W2604741280","https://openalex.org/W2613788219","https://openalex.org/W2616271757","https://openalex.org/W2784018913","https://openalex.org/W2784033700","https://openalex.org/W2798489864","https://openalex.org/W2896457183","https://openalex.org/W2899286282","https://openalex.org/W2914901005","https://openalex.org/W2948709946","https://openalex.org/W3031051334","https://openalex.org/W3102264439","https://openalex.org/W4294170691","https://openalex.org/W6713149151","https://openalex.org/W6713731013","https://openalex.org/W6736430708","https://openalex.org/W6755207826","https://openalex.org/W6891756950"],"related_works":["https://openalex.org/W1995622179","https://openalex.org/W4391160746","https://openalex.org/W1484111231","https://openalex.org/W1528218860","https://openalex.org/W1552543208","https://openalex.org/W2074396517","https://openalex.org/W2166963679","https://openalex.org/W2187269125","https://openalex.org/W1641615907","https://openalex.org/W3089231081"],"abstract_inverted_index":{"Correctly":[0],"identifying":[1],"the":[2,13,87,117,128,134,138,174,182,198,217],"semantic":[3],"label":[4],"of":[5,15],"a":[6,16,90,233],"table":[7],"column":[8],"(e.g.":[9,18],"artist":[10],")":[11],"or":[12],"domain":[14],"tuple":[17,194],"Song)":[19],"is":[20,53,67,89],"crucial":[21],"for":[22,104,147,213],"data":[23,30,35],"science":[24],"tasks,":[25],"such":[26,161,186],"as":[27,102,114,116,162],"schema":[28],"matching,":[29],"cleaning":[31],"and":[32,37,55,80,165,172,224,229],"discovery.":[33],"Existing":[34],"preparation":[36],"integration":[38],"systems":[39],"are":[40,144],"known":[41,69],"to":[42,47,70,84,97,127,208],"make":[43],"mistakes":[44],"that":[45],"need":[46],"be":[48],"corrected":[49],"by":[50],"humans,":[51],"which":[52],"labor-intensive":[54],"expensive":[56],"especially":[57],"at":[58],"scale":[59],"[19],":[60],"[20],":[61],"[25],":[62],"[9],":[63],"[8].":[64],"The":[65],"accuracy":[66,191],"also":[68],"suffer":[71],"on":[72,156,159],"dirty":[73],"data.":[74],"In":[75],"this":[76],"paper":[77],"we":[78,231],"define":[79],"evaluate":[81],"tabular":[82,179,204],"embeddings":[83,111,121,135,143,180,205,228],"help":[85],"increase":[86],"accuracy.Embeddings,":[88],"well-known":[91],"dimensionality":[92],"reduction":[93],"technique,":[94],"usually":[95],"applied":[96],"represent":[98],"(one-dimensional)":[99],"text":[100],"documents/sentences":[101],"vectors":[103,136],"further":[105],"analytics":[106],"[28],":[107],"[7],":[108],"[23].":[109],"Using":[110],"lowers":[112],"dimensionality,":[113],"well":[115],"models":[118],"trained":[119,130,177,184,201],"with":[120,178,202,216],"often":[122],"exhibit":[123],"higher":[124],"accuracy,":[125],"compared":[126,215],"same":[129,183,218],"without":[131,185,219],"such,":[132],"because":[133],"store":[137],"context":[139],"information.":[140],"Our":[141],"2-dimensional":[142],"more":[145],"suitable":[146],"tables":[148,160,240],"rather":[149],"than":[150],"text.":[151],"We":[152,188],"justify":[153],"their":[154],"efficiency":[155],"fundamental":[157],"tasks":[158],"classifying":[163],"columns":[164],"tuples.We":[166],"perform":[167],"an":[168],"extensive":[169],"experimental":[170],"evaluation":[171],"compare":[173],"Neural":[175,199],"Network":[176,200],"against":[181],"embeddings.":[187,221],"report":[189],"significant":[190],"gains":[192],"in":[193,211],"classification":[195],"when":[196],"using":[197],"our":[203,220,227],"-":[206],"up":[207],"17.6%":[209],"delta":[210],"F-measure":[212],"Songs":[214],"For":[222],"training":[223],"evaluating":[225],"all":[226],"models,":[230],"use":[232],"large-scale":[234],"WebTables":[235],"dataset":[236],"having":[237],"\u224815":[238],"million":[239],"coming":[241],"from":[242],"\u2248":[243],"248K":[244],"English":[245],"Web":[246],"sources":[247],"[12].":[248]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
