{"id":"https://openalex.org/W2912231714","doi":"https://doi.org/10.1109/bigdata.2018.8622345","title":"Clinical Text Classification with Word Embedding Features vs. Bag-of-Words Features","display_name":"Clinical Text Classification with Word Embedding Features vs. Bag-of-Words Features","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2912231714","doi":"https://doi.org/10.1109/bigdata.2018.8622345","mag":"2912231714"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2018.8622345","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084025490","display_name":"Yijun Shao","orcid":"https://orcid.org/0000-0001-6419-7963"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yijun Shao","raw_affiliation_strings":["Biomedical Informatics Center, George Washington University, Washington, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Biomedical Informatics Center, George Washington University, Washington, USA","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055338736","display_name":"Stephanie L. Taylor","orcid":"https://orcid.org/0000-0002-3266-1132"},"institutions":[{"id":"https://openalex.org/I4210089489","display_name":"Health Services Research & Development","ror":"https://ror.org/0083hz885","country_code":"US","type":"facility","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089489"]},{"id":"https://openalex.org/I4210156876","display_name":"VA Greater Los Angeles Healthcare System","ror":"https://ror.org/05xcarb80","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210138663","https://openalex.org/I4210156876"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephanie Taylor","raw_affiliation_strings":["Health Services Research and Development, VA Greater Los Angeles Healthcare System, Los Angeles, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Health Services Research and Development, VA Greater Los Angeles Healthcare System, Los Angeles, USA","institution_ids":["https://openalex.org/I4210156876","https://openalex.org/I4210089489"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042612086","display_name":"Nell Marshall","orcid":"https://orcid.org/0000-0002-2145-0014"},"institutions":[{"id":"https://openalex.org/I204866599","display_name":"VA Palo Alto Health Care System","ror":"https://ror.org/00nr17z89","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I204866599","https://openalex.org/I2799886695","https://openalex.org/I4210125474"]},{"id":"https://openalex.org/I4210089489","display_name":"Health Services Research & Development","ror":"https://ror.org/0083hz885","country_code":"US","type":"facility","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210089489"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nell Marshall","raw_affiliation_strings":["Health Services Research and Development, VA Palo Alto Health Care System, Palo Alto, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Health Services Research and Development, VA Palo Alto Health Care System, Palo Alto, USA","institution_ids":["https://openalex.org/I204866599","https://openalex.org/I4210089489"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039723520","display_name":"Craig A. Morioka","orcid":"https://orcid.org/0000-0002-3528-0001"},"institutions":[{"id":"https://openalex.org/I4210156876","display_name":"VA Greater Los Angeles Healthcare System","ror":"https://ror.org/05xcarb80","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1322918889","https://openalex.org/I2799886695","https://openalex.org/I4210138663","https://openalex.org/I4210156876"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Craig Morioka","raw_affiliation_strings":["Department of Radiology, VA Greater Los Angeles Healthcare System, Los Angeles, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Radiology, VA Greater Los Angeles Healthcare System, Los Angeles, USA","institution_ids":["https://openalex.org/I4210156876"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058747294","display_name":"Qing Zeng\u2010Treitler","orcid":"https://orcid.org/0000-0002-8353-7473"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Zeng-Treitler","raw_affiliation_strings":["Biomedical Informatics Center, George Washington University, Washington, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Biomedical Informatics Center, George Washington University, Washington, USA","institution_ids":["https://openalex.org/I193531525"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.5343,"has_fulltext":false,"cited_by_count":47,"citation_normalized_percentile":{"value":0.92003366,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2874","last_page":"2878"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.9622158408164978},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7654680609703064},{"id":"https://openalex.org/keywords/bag-of-words-model","display_name":"Bag-of-words model","score":0.666404128074646},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.6647305488586426},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6528576612472534},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6305669546127319},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6249229907989502},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5314632654190063},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5059012770652771},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4821721911430359},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.47345679998397827},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3316522240638733},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11565107107162476}],"concepts":[{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.9622158408164978},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7654680609703064},{"id":"https://openalex.org/C13672336","wikidata":"https://www.wikidata.org/wiki/Q3460803","display_name":"Bag-of-words model","level":2,"score":0.666404128074646},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.6647305488586426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6528576612472534},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6305669546127319},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6249229907989502},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5314632654190063},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5059012770652771},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4821721911430359},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.47345679998397827},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3316522240638733},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11565107107162476},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2018.8622345","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2018.8622345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8600000143051147}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W168564468","https://openalex.org/W1832693441","https://openalex.org/W1839136742","https://openalex.org/W1849277567","https://openalex.org/W1880262756","https://openalex.org/W1965154800","https://openalex.org/W2025428542","https://openalex.org/W2105770604","https://openalex.org/W2114380216","https://openalex.org/W2131744502","https://openalex.org/W2152311353","https://openalex.org/W2153579005","https://openalex.org/W2153635508","https://openalex.org/W2163605009","https://openalex.org/W2273964271","https://openalex.org/W2290086728","https://openalex.org/W2367952659","https://openalex.org/W2405317410","https://openalex.org/W2467995757","https://openalex.org/W2510560885","https://openalex.org/W2763753716","https://openalex.org/W2786420686","https://openalex.org/W2919115771","https://openalex.org/W2949547296","https://openalex.org/W2963126915","https://openalex.org/W4239510810","https://openalex.org/W4248423645","https://openalex.org/W4285719527","https://openalex.org/W4294170691","https://openalex.org/W6639204139","https://openalex.org/W6639619044","https://openalex.org/W6679775712","https://openalex.org/W6682691769","https://openalex.org/W6684191040","https://openalex.org/W6694657242","https://openalex.org/W6719402147","https://openalex.org/W6725499126","https://openalex.org/W6736878346","https://openalex.org/W6748146847"],"related_works":["https://openalex.org/W2946409105","https://openalex.org/W2905749112","https://openalex.org/W2985392712","https://openalex.org/W3099354896","https://openalex.org/W4287599800","https://openalex.org/W3046869600","https://openalex.org/W3175524270","https://openalex.org/W4251594503","https://openalex.org/W3036348210","https://openalex.org/W4226479509"],"abstract_inverted_index":{"Word":[0],"embedding":[1,25],"motivated":[2],"by":[3],"deep":[4],"learning":[5,37],"have":[6,33],"shown":[7],"promising":[8],"results":[9,70,99],"over":[10],"traditional":[11,74],"bag-of-words":[12,75],"features":[13,58,84],"for":[14,59],"natural":[15],"language":[16],"processing.":[17],"When":[18],"trained":[19],"on":[20],"large":[21],"text":[22,64],"corpora,":[23],"word":[24],"methods":[26,32],"such":[27],"as":[28],"word2vec":[29,55,83],"and":[30,41,56,67],"doc2vec":[31,57],"the":[34,43,46,69,73,82,88],"advantage":[35],"of":[36,45,62],"from":[38],"unlabeled":[39],"data":[40],"reduce":[42],"dimension":[44],"feature":[47],"space.":[48],"In":[49],"this":[50],"study,":[51],"we":[52],"experimented":[53],"with":[54,71],"a":[60],"set":[61],"clinical":[63],"classification":[65],"tasks":[66],"compared":[68],"using":[72],"(BOW)":[76],"features.":[77,90],"The":[78],"study":[79],"showed":[80],"that":[81],"performed":[85],"better":[86],"than":[87],"BOW-1-gram":[89],"However,":[91],"when":[92],"2-grams":[93],"were":[94,100],"added":[95],"to":[96],"BOW,":[97],"comparison":[98],"mixed.":[101]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
