{"id":"https://openalex.org/W7128741903","doi":"https://doi.org/10.48550/arxiv.2602.10735","title":"Calliope: A TTS-based Narrated E-book Creator Ensuring Exact Synchronization, Privacy, and Layout Fidelity","display_name":"Calliope: A TTS-based Narrated E-book Creator Ensuring Exact Synchronization, Privacy, and Layout Fidelity","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128741903","doi":"https://doi.org/10.48550/arxiv.2602.10735"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.10735","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125755533","display_name":"Hugo L. Hammer","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hammer, Hugo L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034317141","display_name":"Vajira Thambawita","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thambawita, Vajira","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5021525405","display_name":"P. Halvorsen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Halvorsen, P\u00e5l","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5125755533"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12060","display_name":"Child Development and Digital Technology","score":0.07909999787807465,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12060","display_name":"Child Development and Digital Technology","score":0.07909999787807465,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10789","display_name":"Interactive and Immersive Displays","score":0.07280000299215317,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12720","display_name":"Multimedia Communication and Technology","score":0.06930000334978104,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7491000294685364},{"id":"https://openalex.org/keywords/timestamp","display_name":"Timestamp","score":0.6328999996185303},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5552999973297119},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.49559998512268066},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.4894999861717224},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.42250001430511475},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4081000089645386},{"id":"https://openalex.org/keywords/synchronizing","display_name":"Synchronizing","score":0.39489999413490295},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.38269999623298645},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.3804999887943268}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8083999752998352},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7491000294685364},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.6328999996185303},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5552999973297119},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.49559998512268066},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.4894999861717224},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.42250001430511475},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4081000089645386},{"id":"https://openalex.org/C162932704","wikidata":"https://www.wikidata.org/wiki/Q1058791","display_name":"Synchronizing","level":3,"score":0.39489999413490295},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.38269999623298645},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.3804999887943268},{"id":"https://openalex.org/C40305131","wikidata":"https://www.wikidata.org/wiki/Q2616305","display_name":"Obfuscation","level":2,"score":0.3741999864578247},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.3504999876022339},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.34869998693466187},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.34689998626708984},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.34549999237060547},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.34049999713897705},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.3296000063419342},{"id":"https://openalex.org/C3913047","wikidata":"https://www.wikidata.org/wiki/Q1956265","display_name":"sync","level":3,"score":0.325300008058548},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32519999146461487},{"id":"https://openalex.org/C2779019669","wikidata":"https://www.wikidata.org/wiki/Q25203946","display_name":"Asynchrony (computer programming)","level":3,"score":0.3237999975681305},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3190999925136566},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C67212190","wikidata":"https://www.wikidata.org/wiki/Q104851","display_name":"Firmware","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C2779623668","wikidata":"https://www.wikidata.org/wiki/Q7652842","display_name":"SwIPe","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.2818000018596649},{"id":"https://openalex.org/C17632256","wikidata":"https://www.wikidata.org/wiki/Q1076968","display_name":"Digital media","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25850000977516174},{"id":"https://openalex.org/C2983335612","wikidata":"https://www.wikidata.org/wiki/Q54277","display_name":"Word processing","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.10735","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.10735","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.10735","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.10735","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.9011330604553223}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0,184],"narrated":[1,67,106],"e-book":[2,103,107],"combines":[3],"synchronized":[4],"audio":[5,121,221],"with":[6,26,169,198],"digital":[7],"text,":[8],"highlighting":[9,224],"the":[10,42,109,136,148,176,199,220,229],"currently":[11,73,174],"spoken":[12],"word":[13],"or":[14],"sentence":[15],"during":[16,126],"playback.":[17],"This":[18,153],"format":[19],"supports":[20,175],"early":[21],"literacy":[22],"and":[23,39,133,141,147,163,182,194,222,234],"assists":[24],"individuals":[25],"reading":[27,38,230],"challenges,":[28],"while":[29,205],"also":[30],"allowing":[31],"general":[32],"readers":[33],"to":[34,56,75,89,99,227],"seamlessly":[35],"switch":[36],"between":[37,131,219],"listening.":[40],"With":[41],"emergence":[43],"of":[44],"natural-sounding":[45],"neural":[46],"Text-to-Speech":[47],"(TTS)":[48],"technology,":[49],"several":[50,118],"commercial":[51],"services":[52],"have":[53],"been":[54],"developed":[55],"leverage":[57],"these":[58],"technology":[59],"for":[60],"converting":[61],"standard":[62],"text":[63,102,134,200,223],"e-books":[64],"into":[65,104],"high-quality":[66],"e-books.":[68],"However,":[69,204],"no":[70],"open-source":[71,86,97,178],"solutions":[72],"exist":[74],"perform":[76],"this":[77,80,91],"task.":[78],"In":[79],"paper,":[81],"we":[82],"present":[83],"Calliope,":[84],"an":[85],"framework":[87,173],"designed":[88],"fill":[90],"gap.":[92],"Our":[93],"method":[94,116,207],"leverages":[95],"state-of-the-art":[96,177],"TTS":[98,179,193],"convert":[100],"a":[101,105],"in":[108],"EPUB":[110],"3":[111],"Media":[112],"Overlay":[113],"format.":[114],"The":[115,172],"offers":[117],"innovative":[119],"steps:":[120],"timestamps":[122],"are":[123,144,237],"captured":[124],"directly":[125],"TTS,":[127],"ensuring":[128],"exact":[129,209],"synchronization":[130],"narration":[132,191],"highlighting;":[135],"publisher's":[137],"original":[138],"typography,":[139],"styling,":[140],"embedded":[142],"media":[143],"strictly":[145],"preserved;":[146],"entire":[149],"pipeline":[150],"operates":[151],"offline.":[152],"offline":[154],"capability":[155],"eliminates":[156],"recurring":[157],"API":[158],"costs,":[159],"mitigates":[160],"privacy":[161],"concerns,":[162],"avoids":[164],"copyright":[165],"compliance":[166],"issues":[167],"associated":[168],"cloud-based":[170],"services.":[171],"systems":[180],"XTTS-v2":[181],"Chatterbox.":[183],"potential":[185],"alternative":[186],"approach":[187],"involves":[188],"first":[189],"generating":[190],"via":[192],"subsequently":[195],"synchronizing":[196],"it":[197],"using":[201],"forced":[202,215],"alignment.":[203],"our":[206,211],"ensures":[208],"synchronization,":[210],"experiments":[212],"show":[213],"that":[214],"alignment":[216],"introduces":[217],"drift":[218],"significant":[225],"enough":[226],"degrade":[228],"experience.":[231],"Source":[232],"code":[233],"usage":[235],"instructions":[236],"available":[238],"at":[239],"https://github.com/hugohammer/TTS-Narrated-Ebook-Creator.git.":[240]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-13T00:00:00"}
