{"id":"https://openalex.org/W4415524244","doi":"https://doi.org/10.1109/mlsp62443.2025.11204203","title":"Semantic Chunking and Chain-Of-Thought Reasoning for Rag-Based Document Processing","display_name":"Semantic Chunking and Chain-Of-Thought Reasoning for Rag-Based Document Processing","publication_year":2025,"publication_date":"2025-08-31","ids":{"openalex":"https://openalex.org/W4415524244","doi":"https://doi.org/10.1109/mlsp62443.2025.11204203"},"language":null,"primary_location":{"id":"doi:10.1109/mlsp62443.2025.11204203","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp62443.2025.11204203","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053589323","display_name":"Yavuz Ate\u015f","orcid":"https://orcid.org/0000-0002-4168-0861"},"institutions":[{"id":"https://openalex.org/I2738502077","display_name":"Nokia (Finland)","ror":"https://ror.org/04pkc8m17","country_code":"FI","type":"company","lineage":["https://openalex.org/I2738502077"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Yi\u011fit Ate\u015f","raw_affiliation_strings":["TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2738502077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025739302","display_name":"Alperen Sayar","orcid":"https://orcid.org/0000-0001-6089-2547"},"institutions":[{"id":"https://openalex.org/I2738502077","display_name":"Nokia (Finland)","ror":"https://ror.org/04pkc8m17","country_code":"FI","type":"company","lineage":["https://openalex.org/I2738502077"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Alperen Sayar","raw_affiliation_strings":["TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2738502077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115842483","display_name":"\u0130brahim Umut Bozlar","orcid":null},"institutions":[{"id":"https://openalex.org/I2738502077","display_name":"Nokia (Finland)","ror":"https://ror.org/04pkc8m17","country_code":"FI","type":"company","lineage":["https://openalex.org/I2738502077"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"\u0130brahim Umut Bozlar","raw_affiliation_strings":["TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2738502077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056236010","display_name":"Seyit Ertu\u011frul","orcid":"https://orcid.org/0000-0003-0828-7336"},"institutions":[{"id":"https://openalex.org/I2738502077","display_name":"Nokia (Finland)","ror":"https://ror.org/04pkc8m17","country_code":"FI","type":"company","lineage":["https://openalex.org/I2738502077"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Seyit Ertu\u011frul","raw_affiliation_strings":["TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye"],"affiliations":[{"raw_affiliation_string":"TAM Finans R&#x0026;D,Istanbul,T&#x00FC;rkiye","institution_ids":["https://openalex.org/I2738502077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039387116","display_name":"\u015euayb S. Arslan","orcid":"https://orcid.org/0000-0003-3779-0731"},"institutions":[{"id":"https://openalex.org/I4210135667","display_name":"Xian Mechanical & Electric Institute (China)","ror":"https://ror.org/04d9wma96","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210135667"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suayb S. Arslan","raw_affiliation_strings":["Bo&#x011F;azi&#x00E7;i University,Department of Computer Engineering and Institute for DSAI"],"affiliations":[{"raw_affiliation_string":"Bo&#x011F;azi&#x00E7;i University,Department of Computer Engineering and Institute for DSAI","institution_ids":["https://openalex.org/I4210135667"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5053589323"],"corresponding_institution_ids":["https://openalex.org/I2738502077"],"apc_list":null,"apc_paid":null,"fwci":5.6662,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.96129715,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9624000191688538,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9624000191688538,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.7488999962806702},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.5630999803543091},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.43849998712539673},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.42719998955726624},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4147999882698059},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4025999903678894},{"id":"https://openalex.org/keywords/semantic-matching","display_name":"Semantic matching","score":0.38670000433921814},{"id":"https://openalex.org/keywords/semantic-memory","display_name":"Semantic memory","score":0.3407000005245209},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.34040001034736633}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8406999707221985},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.7488999962806702},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6496999859809875},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6121000051498413},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.5630999803543091},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.43849998712539673},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.42719998955726624},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4147999882698059},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4025999903678894},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.38670000433921814},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3617999851703644},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34369999170303345},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.3407000005245209},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C67277372","wikidata":"https://www.wikidata.org/wiki/Q7449085","display_name":"Semantic role labeling","level":3,"score":0.3255000114440918},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.3172999918460846},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.303600013256073},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.2824999988079071},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2779500292","wikidata":"https://www.wikidata.org/wiki/Q14802672","display_name":"Text processing","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C95318506","wikidata":"https://www.wikidata.org/wiki/Q6588467","display_name":"Textual entailment","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp62443.2025.11204203","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp62443.2025.11204203","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE 35th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W2740030006","https://openalex.org/W4252076394","https://openalex.org/W4285132953","https://openalex.org/W4402670290","https://openalex.org/W4408147114","https://openalex.org/W4409403872"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3],"novel":[4],"two-phase":[5],"semantic":[6,64,119],"chunking":[7,106],"methodology":[8],"designed":[9],"to":[10,32,104],"enhance":[11],"document":[12,37],"processing":[13,120],"within":[14],"Retrieval-Augmented":[15],"Generation":[16],"(RAG)":[17],"systems.":[18,134],"The":[19,108],"proposed":[20,70],"approach":[21,92],"utilizes":[22,110],"Large":[23],"Language":[24],"Models":[25],"(LLMs)":[26],"and":[27,35,49,76,99],"Chain":[28],"of":[29,67,78],"Thought":[30],"(CoT)":[31],"systematically":[33],"generate":[34],"refine":[36],"chunks,":[38],"while":[39],"concurrently":[40],"producing":[41],"associated":[42],"metadata,":[43],"such":[44],"as":[45],"hypothetical":[46],"user":[47],"queries":[48],"contextual":[50,100],"tags.":[51],"By":[52],"integrating":[53],"established":[54],"information":[55],"retrieval":[56],"techniques-namely":[57],"Best":[58],"Matching":[59],"25":[60],"(BM25)-with":[61],"the":[62,69,74,111],"advanced":[63],"understanding":[65],"capabilities":[66],"LLMs,":[68],"method":[71],"substantially":[72],"improves":[73],"relevance":[75,101],"quality":[77],"retrieved":[79],"context":[80],"for":[81,117],"Generative":[82],"Artificial":[83],"Intelligence":[84],"(GenAI)":[85],"applications.":[86],"Empirical":[87],"evaluations":[88],"reveal":[89],"that":[90],"this":[91],"yields":[93],"significant":[94],"improvements":[95],"in":[96,131],"response":[97],"accuracy":[98],"when":[102],"compared":[103],"traditional":[105],"techniques.":[107],"implementation":[109],"open-source":[112],"Qwen":[113],"2.572":[114],"B":[115],"model":[116],"its":[118],"operations,":[121],"demonstrating":[122],"how":[123],"state-of-the-art":[124],"language":[125],"models":[126],"can":[127],"be":[128],"effectively":[129],"deployed":[130],"practical":[132],"RAG":[133]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
