{"id":"https://openalex.org/W3138573002","doi":"https://doi.org/10.1109/bigdata50022.2020.9378312","title":"Learning Similarity-Preserving Meta-Embedding for Text Mining","display_name":"Learning Similarity-Preserving Meta-Embedding for Text Mining","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3138573002","doi":"https://doi.org/10.1109/bigdata50022.2020.9378312","mag":"3138573002"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9378312","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378312","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033643173","display_name":"Jidapa Thadajarassiri","orcid":null},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jidapa Thadajarassiri","raw_affiliation_strings":["Data Science Program, Worcester Polytechnic Institute, Worcester, USA"],"affiliations":[{"raw_affiliation_string":"Data Science Program, Worcester Polytechnic Institute, Worcester, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032316925","display_name":"Cansu \u015een","orcid":"https://orcid.org/0000-0003-3355-2736"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cansu Sen","raw_affiliation_strings":["Computer Science Department, Worcester Polytechnic Institute, Worcester, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Worcester Polytechnic Institute, Worcester, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075881948","display_name":"Thomas Hartvigsen","orcid":"https://orcid.org/0000-0002-5288-2792"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas Hartvigsen","raw_affiliation_strings":["Data Science Program, Worcester Polytechnic Institute, Worcester, USA"],"affiliations":[{"raw_affiliation_string":"Data Science Program, Worcester Polytechnic Institute, Worcester, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002930471","display_name":"Xiangnan Kong","orcid":"https://orcid.org/0000-0002-7403-5869"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiangnan Kong","raw_affiliation_strings":["Computer Science Department, Worcester Polytechnic Institute, Worcester, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Worcester Polytechnic Institute, Worcester, USA","institution_ids":["https://openalex.org/I107077323"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008269094","display_name":"Elke A. Rundensteiner","orcid":"https://orcid.org/0000-0001-5375-9254"},"institutions":[{"id":"https://openalex.org/I107077323","display_name":"Worcester Polytechnic Institute","ror":"https://ror.org/05ejpqr48","country_code":"US","type":"education","lineage":["https://openalex.org/I107077323"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elke Rundensteiner","raw_affiliation_strings":["Computer Science Department, Worcester Polytechnic Institute, Worcester, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Worcester Polytechnic Institute, Worcester, USA","institution_ids":["https://openalex.org/I107077323"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5033643173"],"corresponding_institution_ids":["https://openalex.org/I107077323"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.21658958,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"12","issue":null,"first_page":"808","last_page":"817"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7963857650756836},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.726028323173523},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7229434251785278},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6564494371414185},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6363793611526489},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5949215292930603},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.545591413974762},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.542201042175293},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5083410143852234},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.5055782794952393},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4836500883102417},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4814954996109009},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4194898009300232},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.41660216450691223},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4009021520614624},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.27940571308135986},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1488114297389984},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.12756618857383728},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09483000636100769}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7963857650756836},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.726028323173523},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7229434251785278},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6564494371414185},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6363793611526489},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5949215292930603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.545591413974762},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.542201042175293},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5083410143852234},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.5055782794952393},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4836500883102417},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4814954996109009},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4194898009300232},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.41660216450691223},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4009021520614624},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27940571308135986},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1488114297389984},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.12756618857383728},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09483000636100769},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9378312","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378312","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1576954243","https://openalex.org/W1614298861","https://openalex.org/W1849368448","https://openalex.org/W1854884267","https://openalex.org/W2067438047","https://openalex.org/W2081580037","https://openalex.org/W2085750684","https://openalex.org/W2091812280","https://openalex.org/W2114524997","https://openalex.org/W2117130368","https://openalex.org/W2135964261","https://openalex.org/W2153579005","https://openalex.org/W2158139315","https://openalex.org/W2158899491","https://openalex.org/W2163455955","https://openalex.org/W2164019165","https://openalex.org/W2187089797","https://openalex.org/W2250539671","https://openalex.org/W2251012068","https://openalex.org/W2251348291","https://openalex.org/W2251803266","https://openalex.org/W2251874715","https://openalex.org/W2514776376","https://openalex.org/W2527896214","https://openalex.org/W2606542855","https://openalex.org/W2741613777","https://openalex.org/W2785447792","https://openalex.org/W2787481916","https://openalex.org/W2853138162","https://openalex.org/W2913433659","https://openalex.org/W2944400536","https://openalex.org/W2950577311","https://openalex.org/W2952230511","https://openalex.org/W2954699761","https://openalex.org/W2962772361","https://openalex.org/W2962808042","https://openalex.org/W2963087041","https://openalex.org/W2963366649","https://openalex.org/W2963850626","https://openalex.org/W2963923670","https://openalex.org/W2964238876","https://openalex.org/W2964921277","https://openalex.org/W2972461241","https://openalex.org/W3006845068","https://openalex.org/W3007787648","https://openalex.org/W4288620981","https://openalex.org/W4294170691","https://openalex.org/W4294367149","https://openalex.org/W6634360984","https://openalex.org/W6636510571","https://openalex.org/W6639247546","https://openalex.org/W6679988210","https://openalex.org/W6682691769","https://openalex.org/W6683557909","https://openalex.org/W6683738474","https://openalex.org/W6684165356","https://openalex.org/W6691746754","https://openalex.org/W6728098186","https://openalex.org/W6747248625","https://openalex.org/W6748355100","https://openalex.org/W6753139071","https://openalex.org/W6758543384"],"related_works":["https://openalex.org/W2468279273","https://openalex.org/W2354198838","https://openalex.org/W2353179089","https://openalex.org/W1989130879","https://openalex.org/W2103419012","https://openalex.org/W2923538289","https://openalex.org/W2353125546","https://openalex.org/W2187606256","https://openalex.org/W2275988210","https://openalex.org/W4320719010"],"abstract_inverted_index":{"Publicly":[0],"available":[1],"pre-trained":[2,31,163],"word":[3,101],"embeddings":[4],"are":[5,74,120,156,171],"rich":[6],"sources":[7,33,61,73,90,191],"for":[8,23,45,99],"turning":[9],"critical":[10],"high-dimensional":[11],"representations":[12],"of":[13,29,41,112],"huge":[14],"text":[15,24],"data":[16],"repositories":[17],"into":[18,62],"meaningful":[19],"compact":[20],"vectors":[21],"essential":[22],"mining":[25,216],"applications.":[26],"With":[27],"many":[28,89],"such":[30,152],"embedding":[32,60,106,147],"available,":[34],"each":[35,100,118],"faces":[36],"limitations":[37],"in":[38,82,161,174,213],"the":[39,46,86,186,190],"appropriateness":[40],"their":[42,154],"language":[43],"use":[44],"downstream":[47],"text-mining":[48],"tasks.":[49],"Meta-embeddings":[50],"aim":[51],"to":[52,85,189,207],"tackle":[53],"this":[54,125],"ambiguity":[55],"challenge":[56],"by":[57,102,200],"fusing":[58],"multiple":[59,162],"one":[63],"feature":[64],"space.":[65],"However,":[66],"current":[67],"meta-embedding":[68,98],"methods":[69,95,199],"assume":[70],"vocabularies":[71],"across":[72,209],"similar":[75],"or":[76],"even":[77],"identical;":[78],"which":[79],"unfortunately":[80],"stands":[81],"sharp":[83],"contrast":[84],"fact":[87],"that":[88,139,153,170,184],"barely":[91],"overlap.":[92],"Further,":[93],"these":[94],"encode":[96],"a":[97,130,180],"reconstructing":[103],"its":[104],"actual":[105],"values":[107],"(word-encoder),":[108],"while":[109],"valuable":[110],"information":[111],"relationships":[113,143],"(distances)":[114],"among":[115],"words":[116,151,169],"within":[117],"source":[119],"not":[121,172],"directly":[122,140],"considered.":[123],"In":[124],"work,":[126],"we":[127,177],"instead":[128],"propose":[129],"novel":[131],"relation-encoder":[132],"learning":[133,187],"approach":[134],"called":[135],"Similarity-Preserving":[136],"Meta-Embedding":[137],"(SimME)":[138],"integrates":[141],"word-pair":[142],"from":[144,158],"partially":[145],"overlapping":[146],"sources.":[148,164],"SimME":[149,195],"embeds":[150],"similarities":[155],"learned":[157],"those":[159],"observed":[160],"To":[165],"handle":[166],"relations":[167],"between":[168],"present":[173],"all":[175],"sources,":[176],"introduce":[178],"maskout,":[179],"new":[181],"loss":[182],"term,":[183],"steers":[185],"selectively":[188],"containing":[192],"said":[193],"relations.":[194],"consistently":[196],"outperforms":[197],"state-of-the-art":[198],"10%":[201],"on":[202,218],"average":[203],"and":[204],"with":[205],"up":[206],"20%":[208],"several":[210],"core":[211],"metrics":[212],"4":[214],"popular":[215],"tasks":[217],"23":[219],"datasets.":[220]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
