{"id":"https://openalex.org/W6906657084","doi":"https://doi.org/10.18420/inf2024_37","title":"A Comparative Analysis on Machine Learning Techniques for Research Metadata: the ARDUOUS Case Study","display_name":"A Comparative Analysis on Machine Learning Techniques for Research Metadata: the ARDUOUS Case Study","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W6906657084","doi":"https://doi.org/10.18420/inf2024_37"},"language":"en","primary_location":{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/b23c5d00-19b4-4d85-b0b6-95864333c830","is_oa":false,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/b23c5d00-19b4-4d85-b0b6-95864333c830","pdf_url":null,"source":{"id":"https://openalex.org/S7407055359","display_name":"Explore Bristol Research","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Yadav, D, Tonkin, E L, Stoev, T & Yordanova, K 2024, 'A Comparative Analysis on Machine Learning Techniques for Research Metadata : the ARDUOUS Case Study'.","raw_type":"info:eu-repo/semantics/publishedVersion"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.18420/inf2024_37","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yadav, Dipendra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yadav, Dipendra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tonkin, Emma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tonkin, Emma","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Stoev, Teodor","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stoev, Teodor","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Yordanova, Kristina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yordanova, Kristina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.31858069,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.41290000081062317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.41290000081062317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.15850000083446503,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.07609999924898148,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7324000000953674},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.7300000190734863},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.6205000281333923},{"id":"https://openalex.org/keywords/silhouette","display_name":"Silhouette","score":0.5072000026702881},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.4943000078201294},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4383000135421753},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.41200000047683716},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.4000000059604645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7459999918937683},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7324000000953674},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.7300000190734863},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.6205000281333923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5539000034332275},{"id":"https://openalex.org/C58103923","wikidata":"https://www.wikidata.org/wiki/Q2286025","display_name":"Silhouette","level":2,"score":0.5072000026702881},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4968000054359436},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.4943000078201294},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4383000135421753},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.414000004529953},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.41200000047683716},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.4000000059604645},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.39480000734329224},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.34380000829696655},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32589998841285706},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C39235581","wikidata":"https://www.wikidata.org/wiki/Q5158434","display_name":"Conceptual clustering","level":5,"score":0.30869999527931213},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C2778355071","wikidata":"https://www.wikidata.org/wiki/Q1933849","display_name":"Microdata (statistics)","level":4,"score":0.2603999972343445},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.2549000084400177}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/b23c5d00-19b4-4d85-b0b6-95864333c830","is_oa":false,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/b23c5d00-19b4-4d85-b0b6-95864333c830","pdf_url":null,"source":{"id":"https://openalex.org/S7407055359","display_name":"Explore Bristol Research","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Yadav, D, Tonkin, E L, Stoev, T & Yordanova, K 2024, 'A Comparative Analysis on Machine Learning Techniques for Research Metadata : the ARDUOUS Case Study'.","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"doi:10.18420/inf2024_37","is_oa":true,"landing_page_url":"https://doi.org/10.18420/inf2024_37","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18420/inf2024_37","is_oa":true,"landing_page_url":"https://doi.org/10.18420/inf2024_37","pdf_url":null,"source":{"id":"https://openalex.org/S7407052918","display_name":"Gesellschaft f\u00fcr Informatik (GI)","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article-journal"},"sustainable_development_goals":[{"score":0.5583077073097229,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,55,94],"rapid":[1],"increase":[2],"in":[3,86,152,166,185],"research":[4,176],"publications":[5],"necessitates":[6],"effective":[7,128],"methods":[8],"for":[9,45,174],"organizing":[10],"and":[11,29,89,107,118,145,170],"analyzing":[12,175],"large":[13],"volumes":[14],"of":[15,23,42,52,60,68,76,182],"textual":[16,186],"data.":[17,187],"This":[18],"study":[19,95],"evaluates":[20],"various":[21],"combinations":[22],"embedding":[24],"models,":[25],"dimensionality":[26,129],"reduction":[27,130],"techniques,":[28,131],"clustering":[30],"algorithms":[31,140],"applied":[32],"to":[33],"metadata":[34],"from":[35],"papers":[36,169],"accepted":[37],"at":[38],"the":[39,87,98,126,162,167,180],"ARDUOUS":[40],"(Annotation":[41],"useR":[43],"Data":[44],"UbiquitOUs":[46],"Systems)":[47],"workshop":[48,168],"over":[49],"a":[50],"period":[51],"7":[53],"years.":[54],"analysis":[56],"encompasses":[57],"different":[58],"types":[59],"keywords,":[61],"including":[62],"All":[63],"Keywords":[64,73,82,91],"(a":[65],"comprehensive":[66],"set":[67],"all":[69,111],"extracted":[70],"keywords),":[71],"Multi-word":[72],"(phrases":[74],"consisting":[75],"two":[77],"or":[78],"more":[79],"words),":[80],"Existing":[81],"(keywords":[83],"already":[84],"present":[85],"metadata),":[88],"Single-word":[90],"(individual":[92],"words).":[93],"found":[96],"that":[97],"highest":[99],"silhouette":[100],"scores":[101],"were":[102,123],"achieved":[103],"with":[104],"3,":[105],"4,":[106],"5":[108],"clusters":[109],"across":[110],"keyword":[112],"types.":[113],"Principal":[114],"Component":[115,120],"Analysis":[116,121],"(PCA)":[117],"Independent":[119],"(ICA)":[122],"identified":[124],"as":[125,142],"most":[127],"while":[132],"DistilBERT":[133],"embeddings":[134],"consistently":[135],"yielded":[136],"high":[137],"scores.":[138],"Clustering":[139],"such":[141],"k-means,":[143],"k-medoids,":[144],"Gaussian":[146],"Mixture":[147],"Models":[148],"(GMM)":[149],"demonstrated":[150],"robustness":[151],"forming":[153],"well-defined":[154],"clusters.":[155],"These":[156],"findings":[157],"provide":[158],"valuable":[159],"insights":[160],"into":[161],"main":[163],"topics":[164],"covered":[165],"suggest":[171],"optimal":[172],"methodologies":[173],"metadata,":[177],"thereby":[178],"enhancing":[179],"understanding":[181],"semantic":[183],"relationships":[184]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
