{"id":"https://openalex.org/W4411352972","doi":"https://doi.org/10.3390/computation13060151","title":"English-Arabic Hybrid Semantic Text Chunking Based on Fine-Tuning BERT","display_name":"English-Arabic Hybrid Semantic Text Chunking Based on Fine-Tuning BERT","publication_year":2025,"publication_date":"2025-06-16","ids":{"openalex":"https://openalex.org/W4411352972","doi":"https://doi.org/10.3390/computation13060151"},"language":"en","primary_location":{"id":"doi:10.3390/computation13060151","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computation13060151","pdf_url":"https://www.mdpi.com/2079-3197/13/6/151/pdf?version=1750063742","source":{"id":"https://openalex.org/S2738402919","display_name":"Computation","issn_l":"2079-3197","issn":["2079-3197"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computation","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2079-3197/13/6/151/pdf?version=1750063742","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Mai Alammar","orcid":"https://orcid.org/0009-0005-3855-5209"},"institutions":[{"id":"https://openalex.org/I240666556","display_name":"Imam Mohammad ibn Saud Islamic University","ror":"https://ror.org/05gxjyb39","country_code":"SA","type":"education","lineage":["https://openalex.org/I240666556"]},{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA"],"is_corresponding":true,"raw_author_name":"Mai Alammar","raw_affiliation_strings":["Department of Computer Science, College of Computer and Information Science, Imam Mohammad Ibn Saud Islamic University (IMSIU), Riyadh 11564, Saudi Arabia","Department of Computer Science, College of Computer and Information Sciences, King Saud University, Riyadh 11451, Saudi Arabia"],"raw_orcid":"https://orcid.org/0009-0005-3855-5209","affiliations":[{"raw_affiliation_string":"Department of Computer Science, College of Computer and Information Science, Imam Mohammad Ibn Saud Islamic University (IMSIU), Riyadh 11564, Saudi Arabia","institution_ids":["https://openalex.org/I240666556"]},{"raw_affiliation_string":"Department of Computer Science, College of Computer and Information Sciences, King Saud University, Riyadh 11451, Saudi Arabia","institution_ids":["https://openalex.org/I28022161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079481884","display_name":"Khalil El Hindi","orcid":"https://orcid.org/0000-0003-2457-9961"},"institutions":[{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Khalil El Hindi","raw_affiliation_strings":["Department of Computer Science, College of Computer and Information Sciences, King Saud University, Riyadh 11451, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0003-2457-9961","affiliations":[{"raw_affiliation_string":"Department of Computer Science, College of Computer and Information Sciences, King Saud University, Riyadh 11451, Saudi Arabia","institution_ids":["https://openalex.org/I28022161"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063710079","display_name":"Hend S. Al\u2010Khalifa","orcid":"https://orcid.org/0000-0002-7328-4935"},"institutions":[{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Hend Al-Khalifa","raw_affiliation_strings":["Department of Computer Science, College of Computer and Information Sciences, King Saud University, Riyadh 11451, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-7328-4935","affiliations":[{"raw_affiliation_string":"Department of Computer Science, College of Computer and Information Sciences, King Saud University, Riyadh 11451, Saudi Arabia","institution_ids":["https://openalex.org/I28022161"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I240666556","https://openalex.org/I28022161"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":6.5198,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.96210648,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"13","issue":"6","first_page":"151","last_page":"151"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.8441859483718872},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.7066091299057007},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7053380608558655},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6673551797866821},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5900301337242126},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.40447551012039185},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.07318314909934998}],"concepts":[{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.8441859483718872},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.7066091299057007},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7053380608558655},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6673551797866821},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5900301337242126},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.40447551012039185},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.07318314909934998}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/computation13060151","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computation13060151","pdf_url":"https://www.mdpi.com/2079-3197/13/6/151/pdf?version=1750063742","source":{"id":"https://openalex.org/S2738402919","display_name":"Computation","issn_l":"2079-3197","issn":["2079-3197"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computation","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:916511a37c7744ccbce19ea625e6a6db","is_oa":true,"landing_page_url":"https://doaj.org/article/916511a37c7744ccbce19ea625e6a6db","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computation, Vol 13, Iss 6, p 151 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/computation13060151","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computation13060151","pdf_url":"https://www.mdpi.com/2079-3197/13/6/151/pdf?version=1750063742","source":{"id":"https://openalex.org/S2738402919","display_name":"Computation","issn_l":"2079-3197","issn":["2079-3197"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computation","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411352972.pdf","grobid_xml":"https://content.openalex.org/works/W4411352972.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W1557074680","https://openalex.org/W1566289585","https://openalex.org/W1828401780","https://openalex.org/W1862888253","https://openalex.org/W2118612506","https://openalex.org/W2137320444","https://openalex.org/W2159083595","https://openalex.org/W2508865106","https://openalex.org/W2512217112","https://openalex.org/W2585620645","https://openalex.org/W2739351760","https://openalex.org/W2739675333","https://openalex.org/W2761764495","https://openalex.org/W2793710388","https://openalex.org/W2896457183","https://openalex.org/W2914694065","https://openalex.org/W2962716111","https://openalex.org/W2970641574","https://openalex.org/W2997244287","https://openalex.org/W3034327408","https://openalex.org/W3104033643","https://openalex.org/W3115355887","https://openalex.org/W3154836396","https://openalex.org/W3163699936","https://openalex.org/W3198035914","https://openalex.org/W3205577628","https://openalex.org/W4231510805","https://openalex.org/W4238205294","https://openalex.org/W4285204079","https://openalex.org/W4288089799","https://openalex.org/W4293194974","https://openalex.org/W4367677438","https://openalex.org/W4391969769","https://openalex.org/W4394577840","https://openalex.org/W4401042285","https://openalex.org/W4403279032","https://openalex.org/W4404783428","https://openalex.org/W6638575021","https://openalex.org/W6639619044","https://openalex.org/W6679356491","https://openalex.org/W6732759438","https://openalex.org/W6739901393","https://openalex.org/W6769627184"],"related_works":["https://openalex.org/W2384729545","https://openalex.org/W2198395236","https://openalex.org/W2800417007","https://openalex.org/W147604216","https://openalex.org/W2161080928","https://openalex.org/W4245487161","https://openalex.org/W2090755435","https://openalex.org/W2039036070","https://openalex.org/W2153813398","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Semantic":[0,20],"text":[1,6,49,59,93,122,136],"chunking":[2,21,50,60,123,137,161],"refers":[3],"to":[4,86,119,132],"segmenting":[5],"into":[7,12],"coherently":[8],"semantic":[9,48,58,81,92,121,160],"chunks,":[10],"i.e.,":[11],"sets":[13],"of":[14,56,107,114,165,175],"statements":[15],"that":[16,52,148],"are":[17],"semantically":[18],"related.":[19],"is":[22,111],"an":[23,112,115,130,141,163,173],"essential":[24],"pre-processing":[25],"step":[26],"in":[27,100,167,177,188,193],"various":[28],"NLP":[29],"tasks":[30],"e.g.,":[31],"document":[32],"summarization,":[33],"sentiment":[34],"analysis":[35],"and":[36,68,90,102,171,186,191],"question":[37],"answering.":[38],"In":[39],"this":[40,125],"paper,":[41],"we":[42,128],"propose":[43],"a":[44,88],"hybrid":[45],"chunking;":[46],"two-steps":[47],"method":[51,99,138],"combines":[53],"the":[54,63,69,78,97,105,150,168,178,189,194,197],"effectiveness":[55],"unsupervised":[57,159],"based":[61],"on":[62,80,154,181],"similarities":[64],"between":[65],"sentences":[66],"embeddings":[67],"pre-trained":[70,152],"language":[71],"models":[72],"(PLMs)":[73],"especially":[74],"BERT":[75,79,153],"by":[76,140,162,172],"fine-tuning":[77],"textual":[82],"similarity":[83],"task":[84],"(STS)":[85],"provide":[87],"flexible":[89],"effective":[91],"chunking.":[94],"We":[95],"evaluated":[96],"proposed":[98,135],"English":[101,143,183],"Arabic.":[103],"To":[104],"best":[106],"our":[108,134],"knowledge,":[109],"there":[110],"absence":[113],"Arabic":[116,198],"dataset":[117],"created":[118,129],"assess":[120],"at":[124],"level.":[126],"Therefore,":[127],"AraWiki50k":[131],"evaluate":[133],"inspired":[139],"existing":[142],"dataset.":[144,199],"Our":[145],"experiments":[146],"showed":[147],"exploiting":[149],"fine-tuned":[151],"STS":[155],"enhances":[156],"results":[157],"over":[158],"average":[164,174],"7.4":[166],"PK":[169,190],"metric":[170,180],"11.19":[176],"WindowDiff":[179,195],"four":[182],"evaluation":[184],"datasets,":[185],"0.12":[187],"2.29":[192],"for":[196]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
