{"id":"https://openalex.org/W4400075043","doi":"https://doi.org/10.3390/bdcc8070072","title":"Semantic Non-Negative Matrix Factorization for Term Extraction","display_name":"Semantic Non-Negative Matrix Factorization for Term Extraction","publication_year":2024,"publication_date":"2024-06-27","ids":{"openalex":"https://openalex.org/W4400075043","doi":"https://doi.org/10.3390/bdcc8070072"},"language":"en","primary_location":{"id":"doi:10.3390/bdcc8070072","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc8070072","pdf_url":"https://www.mdpi.com/2504-2289/8/7/72/pdf?version=1719498207","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-2289/8/7/72/pdf?version=1719498207","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088617921","display_name":"Aliya Nugumanova","orcid":"https://orcid.org/0000-0001-5522-4421"},"institutions":[{"id":"https://openalex.org/I4210141757","display_name":"Astana Medical University","ror":"https://ror.org/038mavt60","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210141757"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Aliya Nugumanova","raw_affiliation_strings":["Big Data and Blockchain Technologies Research Innovation Center, Astana IT University, Astana 010000, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Big Data and Blockchain Technologies Research Innovation Center, Astana IT University, Astana 010000, Kazakhstan","institution_ids":["https://openalex.org/I4210141757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092405032","display_name":"Almas Alzhanov","orcid":"https://orcid.org/0009-0007-8083-2366"},"institutions":[{"id":"https://openalex.org/I4210141757","display_name":"Astana Medical University","ror":"https://ror.org/038mavt60","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210141757"]}],"countries":["KZ"],"is_corresponding":true,"raw_author_name":"Almas Alzhanov","raw_affiliation_strings":["Big Data and Blockchain Technologies Research Innovation Center, Astana IT University, Astana 010000, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Big Data and Blockchain Technologies Research Innovation Center, Astana IT University, Astana 010000, Kazakhstan","institution_ids":["https://openalex.org/I4210141757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011115168","display_name":"Aiganym Mansurova","orcid":"https://orcid.org/0009-0007-9076-0722"},"institutions":[{"id":"https://openalex.org/I4210141757","display_name":"Astana Medical University","ror":"https://ror.org/038mavt60","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210141757"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Aiganym Mansurova","raw_affiliation_strings":["Big Data and Blockchain Technologies Research Innovation Center, Astana IT University, Astana 010000, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Big Data and Blockchain Technologies Research Innovation Center, Astana IT University, Astana 010000, Kazakhstan","institution_ids":["https://openalex.org/I4210141757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099585382","display_name":"Kamilla Rakhymbek","orcid":null},"institutions":[{"id":"https://openalex.org/I4210096915","display_name":"Sarsen Amanzholov East Kazakhstan University","ror":"https://ror.org/00vj0q898","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210096915"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Kamilla Rakhymbek","raw_affiliation_strings":["Laboratory of Digital Technologies and Modeling, Sarsen Amanzholov East Kazakhstan University, Ust-Kamenogorsk 070000, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Laboratory of Digital Technologies and Modeling, Sarsen Amanzholov East Kazakhstan University, Ust-Kamenogorsk 070000, Kazakhstan","institution_ids":["https://openalex.org/I4210096915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076371700","display_name":"Yerzhan Baiburin","orcid":"https://orcid.org/0000-0002-1583-9912"},"institutions":[{"id":"https://openalex.org/I4210096915","display_name":"Sarsen Amanzholov East Kazakhstan University","ror":"https://ror.org/00vj0q898","country_code":"KZ","type":"education","lineage":["https://openalex.org/I4210096915"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Yerzhan Baiburin","raw_affiliation_strings":["Laboratory of Digital Technologies and Modeling, Sarsen Amanzholov East Kazakhstan University, Ust-Kamenogorsk 070000, Kazakhstan"],"affiliations":[{"raw_affiliation_string":"Laboratory of Digital Technologies and Modeling, Sarsen Amanzholov East Kazakhstan University, Ust-Kamenogorsk 070000, Kazakhstan","institution_ids":["https://openalex.org/I4210096915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5092405032"],"corresponding_institution_ids":["https://openalex.org/I4210141757"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":1.441,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.84225153,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"8","issue":"7","first_page":"72","last_page":"72"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.6486949920654297},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.540682315826416},{"id":"https://openalex.org/keywords/matrix-decomposition","display_name":"Matrix decomposition","score":0.531484842300415},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4965136647224426},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.4312068521976471},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.42622315883636475},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42533552646636963},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4031760096549988},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.19352442026138306},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18616989254951477},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.15913280844688416},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.07906478643417358},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.07797655463218689}],"concepts":[{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.6486949920654297},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.540682315826416},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.531484842300415},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4965136647224426},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.4312068521976471},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.42622315883636475},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42533552646636963},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4031760096549988},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.19352442026138306},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18616989254951477},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.15913280844688416},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.07906478643417358},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.07797655463218689},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/bdcc8070072","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc8070072","pdf_url":"https://www.mdpi.com/2504-2289/8/7/72/pdf?version=1719498207","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5e4e2ae3a0ee4ac98605c1b7d6ad30fb","is_oa":true,"landing_page_url":"https://doaj.org/article/5e4e2ae3a0ee4ac98605c1b7d6ad30fb","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data and Cognitive Computing, Vol 8, Iss 7, p 72 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/bdcc8070072","is_oa":true,"landing_page_url":"https://doi.org/10.3390/bdcc8070072","pdf_url":"https://www.mdpi.com/2504-2289/8/7/72/pdf?version=1719498207","source":{"id":"https://openalex.org/S4210238752","display_name":"Big Data and Cognitive Computing","issn_l":"2504-2289","issn":["2504-2289"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data and Cognitive Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4400075043.pdf"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W148549881","https://openalex.org/W2002519017","https://openalex.org/W2067931421","https://openalex.org/W2108919995","https://openalex.org/W2135247417","https://openalex.org/W2146913572","https://openalex.org/W2395783183","https://openalex.org/W2493916176","https://openalex.org/W2769771466","https://openalex.org/W2996390114","https://openalex.org/W3008534764","https://openalex.org/W3130402426","https://openalex.org/W3157031730","https://openalex.org/W3161555785","https://openalex.org/W3176491997","https://openalex.org/W3193658714","https://openalex.org/W4213009331","https://openalex.org/W4224239755","https://openalex.org/W4224302157","https://openalex.org/W4225120353","https://openalex.org/W4297808089","https://openalex.org/W4312802177","https://openalex.org/W4385573066","https://openalex.org/W4386464638","https://openalex.org/W4389009544","https://openalex.org/W6601217708","https://openalex.org/W6676432025","https://openalex.org/W6680012447","https://openalex.org/W6843995235"],"related_works":["https://openalex.org/W2794559785","https://openalex.org/W1754499339","https://openalex.org/W2013873776","https://openalex.org/W42295635","https://openalex.org/W1973996291","https://openalex.org/W2950281908","https://openalex.org/W2963117165","https://openalex.org/W2084977674","https://openalex.org/W1973739845","https://openalex.org/W119752240"],"abstract_inverted_index":{"This":[0],"study":[1],"introduces":[2],"an":[3],"unsupervised":[4],"term":[5,43,149,166,220],"extraction":[6,150,221],"approach":[7],"that":[8,24,67,106,122,171],"combines":[9],"non-negative":[10],"matrix":[11,33,48,95],"factorization":[12],"(NMF)":[13],"with":[14,96],"word":[15,60],"embeddings.":[16,61],"Inspired":[17],"by":[18],"a":[19,46,75,97,118],"pioneering":[20],"semantic":[21,50,110],"NMF":[22,120,177],"method":[23,173],"employs":[25],"regularization":[26],"to":[27,72,79,102,140],"jointly":[28,123],"optimize":[29],"document\u2013word":[30,126],"and":[31,127,178,185,191,195,208,210,222,229],"word\u2013word":[32,47,94],"factorizations":[34],"for":[35,42,135,162,218],"document":[36,223],"clustering,":[37],"we":[38,90,116,138,193],"adapt":[39],"this":[40,88],"strategy":[41],"extraction.":[44,167],"Typically,":[45],"representing":[49],"relationships":[51],"between":[52,59,84],"words":[53,105],"is":[54],"constructed":[55],"using":[56],"cosine":[57,82],"similarities":[58,83],"However,":[62],"it":[63],"has":[64],"been":[65],"established":[66],"transformer":[68],"encoder":[69],"embeddings":[70],"tend":[71],"reside":[73],"within":[74],"narrow":[76],"cone,":[77],"leading":[78],"consistently":[80],"high":[81],"words.":[85],"To":[86,187],"address":[87],"issue,":[89],"replace":[91],"the":[92,108,113,125,153,163,206],"conventional":[93],"word\u2013seed":[98,128],"submatrix,":[99],"restricting":[100],"columns":[101],"\u2018domain":[103],"seeds\u2019\u2014specific":[104],"encapsulate":[107],"essential":[109],"features":[111],"of":[112,156,165],"domain.":[114],"Therefore,":[115],"propose":[117],"modified":[119],"framework":[121],"factorizes":[124],"matrices,":[129],"producing":[130],"more":[131],"precise":[132],"encoding":[133],"vectors":[134],"words,":[136],"which":[137],"utilize":[139],"extract":[141],"high-relevancy":[142],"topic-related":[143],"terms.":[144],"Our":[145],"modification":[146],"significantly":[147],"improves":[148],"effectiveness,":[151],"marking":[152],"first":[154],"implementation":[155],"semantically":[157],"enhanced":[158],"NMF,":[159],"designed":[160],"specifically":[161],"task":[164],"Comparative":[168],"experiments":[169],"demonstrate":[170],"our":[172],"outperforms":[174],"both":[175,219],"traditional":[176],"advanced":[179],"transformer-based":[180],"methods":[181],"such":[182],"as":[183],"KeyBERT":[184],"BERTopic.":[186],"support":[188],"further":[189],"research":[190],"application,":[192],"compile":[194],"manually":[196],"annotate":[197],"two":[198],"new":[199],"datasets,":[200],"each":[201],"containing":[202],"1000":[203],"sentences,":[204],"from":[205],"\u2018Geography":[207],"History\u2019":[209],"\u2018National":[211],"Heroes\u2019":[212],"domains.":[213],"These":[214],"datasets":[215,230],"are":[216,231],"useful":[217],"classification":[224],"tasks.":[225],"All":[226],"related":[227],"code":[228],"freely":[232],"available.":[233]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
