{"id":"https://openalex.org/W3086369502","doi":"https://doi.org/10.1109/inista49547.2020.9194665","title":"Document Processing: Methods for Semantic Text Similarity Analysis","display_name":"Document Processing: Methods for Semantic Text Similarity Analysis","publication_year":2020,"publication_date":"2020-08-01","ids":{"openalex":"https://openalex.org/W3086369502","doi":"https://doi.org/10.1109/inista49547.2020.9194665","mag":"3086369502"},"language":"en","primary_location":{"id":"doi:10.1109/inista49547.2020.9194665","is_oa":false,"landing_page_url":"https://doi.org/10.1109/inista49547.2020.9194665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on INnovations in Intelligent SysTems and Applications (INISTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027339233","display_name":"Abdul Wahab Qurashi","orcid":"https://orcid.org/0000-0002-0903-3883"},"institutions":[{"id":"https://openalex.org/I133837150","display_name":"University of Huddersfield","ror":"https://ror.org/05t1h8f27","country_code":"GB","type":"education","lineage":["https://openalex.org/I133837150"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Abdul Wahab Qurashi","raw_affiliation_strings":["School of Computing and Engineering, University of Huddersfield, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Engineering, University of Huddersfield, United Kingdom","institution_ids":["https://openalex.org/I133837150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005904368","display_name":"Violeta Holmes","orcid":"https://orcid.org/0000-0002-9786-4555"},"institutions":[{"id":"https://openalex.org/I133837150","display_name":"University of Huddersfield","ror":"https://ror.org/05t1h8f27","country_code":"GB","type":"education","lineage":["https://openalex.org/I133837150"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Violeta Holmes","raw_affiliation_strings":["School of Computing and Engineering, University of Huddersfield, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Engineering, University of Huddersfield, United Kingdom","institution_ids":["https://openalex.org/I133837150"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012947651","display_name":"Anju P. Johnson","orcid":"https://orcid.org/0000-0002-7017-1644"},"institutions":[{"id":"https://openalex.org/I133837150","display_name":"University of Huddersfield","ror":"https://ror.org/05t1h8f27","country_code":"GB","type":"education","lineage":["https://openalex.org/I133837150"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anju P. Johnson","raw_affiliation_strings":["School of Computing and Engineering, University of Huddersfield, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Engineering, University of Huddersfield, United Kingdom","institution_ids":["https://openalex.org/I133837150"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.1977,"has_fulltext":false,"cited_by_count":66,"citation_normalized_percentile":{"value":0.95140954,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8156746625900269},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.8119666576385498},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.6858429908752441},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6474937200546265},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5961918830871582},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5593979358673096},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.550704836845398},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4970116913318634},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4686424434185028},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4588603973388672},{"id":"https://openalex.org/keywords/semantic-equivalence","display_name":"Semantic equivalence","score":0.4519025683403015},{"id":"https://openalex.org/keywords/equivalence","display_name":"Equivalence (formal languages)","score":0.43254226446151733},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.41179585456848145},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.3623720705509186},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.14429432153701782},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.11475980281829834},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.104888916015625},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07845082879066467}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8156746625900269},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.8119666576385498},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.6858429908752441},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6474937200546265},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5961918830871582},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5593979358673096},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.550704836845398},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4970116913318634},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4686424434185028},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4588603973388672},{"id":"https://openalex.org/C37926939","wikidata":"https://www.wikidata.org/wiki/Q7449061","display_name":"Semantic equivalence","level":4,"score":0.4519025683403015},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.43254226446151733},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.41179585456848145},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.3623720705509186},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.14429432153701782},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.11475980281829834},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.104888916015625},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07845082879066467},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/inista49547.2020.9194665","is_oa":false,"landing_page_url":"https://doi.org/10.1109/inista49547.2020.9194665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on INnovations in Intelligent SysTems and Applications (INISTA)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/a498ce04-e230-48ca-8b04-86ee69fa9e38","is_oa":false,"landing_page_url":"https://pure.hud.ac.uk/en/publications/a498ce04-e230-48ca-8b04-86ee69fa9e38","pdf_url":null,"source":{"id":"https://openalex.org/S4306402508","display_name":"Huddersfield Research Portal (University of Huddersfield)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I133837150","host_organization_name":"University of Huddersfield","host_organization_lineage":["https://openalex.org/I133837150"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Qurashi, A W, Holmes, V & Johnson, A P 2020, Document Processing : Methods for Semantic Text Similarity Analysis. in M Ivanovic, T Yildirim, G Trajcevski, C Badica, L Bellatreche, I Kotenko, A Badica, B Erkmen & M Savic (eds), 2020 International Conference on INnovations in Intelligent SysTems and Applications, Proceedings : INISTA 2020., 9194665, Institute of Electrical and Electronics Engineers Inc., 2020 International Conference on INnovations in Intelligent SysTems and Applications, Novi Sad, Serbia, 24/08/20. https://doi.org/10.1109/INISTA49547.2020.9194665","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W839185196","https://openalex.org/W1521626219","https://openalex.org/W1532325895","https://openalex.org/W1566018662","https://openalex.org/W1604224286","https://openalex.org/W2131571251","https://openalex.org/W2307895033","https://openalex.org/W2608106774","https://openalex.org/W2785047343","https://openalex.org/W2789354743","https://openalex.org/W2951290378","https://openalex.org/W4213009331","https://openalex.org/W4294170691","https://openalex.org/W6633661181","https://openalex.org/W6682691769","https://openalex.org/W6736348905","https://openalex.org/W6747940797"],"related_works":["https://openalex.org/W4381948805","https://openalex.org/W4214483597","https://openalex.org/W4220978606","https://openalex.org/W4220894110","https://openalex.org/W4286850906","https://openalex.org/W4321843578","https://openalex.org/W1437580529","https://openalex.org/W4313532769","https://openalex.org/W2307895033","https://openalex.org/W3215923396"],"abstract_inverted_index":{"The":[0,35,93],"document":[1],"text":[2,26],"similarity":[3,27,89,112,122],"measurement":[4,123],"and":[5,54,68,75,85,87,108,110,121],"analysis":[6],"is":[7,41,98],"a":[8],"growing":[9],"application":[10],"of":[11,20,38,46,49,80,104,113],"Natural":[12,81,118],"Language":[13,82],"Processing.":[14],"This":[15],"paper":[16],"presents":[17],"the":[18,44,58,78,102],"results":[19,94],"using":[21,117],"different":[22,69],"techniques":[23],"for":[24,32,52],"semantic":[25,47],"measurements":[28],"in":[29,57],"documents":[30,59,116],"used":[31],"safety-critical":[33,115],"systems.":[34],"research":[36],"objective":[37],"this":[39],"work":[40],"to":[42,72,100],"measure":[43,111],"degree":[45],"equivalence":[48],"multi-word":[50],"sentences":[51],"rules":[53,107],"procedures":[55,109],"contained":[56],"on":[60],"railway":[61],"safety.":[62],"These":[63],"documents,":[64],"with":[65],"unstructured":[66],"data":[67],"formats,":[70],"need":[71],"be":[73],"preprocessed":[74],"cleaned":[76],"before":[77],"set":[79],"Processing":[83],"toolkits,":[84],"Jaccard":[86],"Cosine":[88],"metrics":[90],"are":[91],"applied.":[92],"demonstrate":[95],"that":[96],"it":[97],"feasible":[99],"automate":[101],"process":[103],"identifying":[105],"equivalent":[106],"disparate":[114],"language":[119],"processing":[120],"techniques.":[124]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
