{"id":"https://openalex.org/W7123471160","doi":"https://doi.org/10.48550/arxiv.2601.07314","title":"Mitrasamgraha: A Comprehensive Classical Sanskrit Machine Translation Dataset","display_name":"Mitrasamgraha: A Comprehensive Classical Sanskrit Machine Translation Dataset","publication_year":2026,"publication_date":"2026-01-12","ids":{"openalex":"https://openalex.org/W7123471160","doi":"https://doi.org/10.48550/arxiv.2601.07314"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.07314","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.07314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.07314","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034724523","display_name":"Sebastian Nehrdich","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nehrdich, Sebastian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038985588","display_name":"David Allport","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Allport, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077958303","display_name":"Sven Sellmer","orcid":"https://orcid.org/0000-0002-6688-0667"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sellmer, Sven","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071913667","display_name":"Jivnesh Sandhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sandhan, Jivnesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062649893","display_name":"Manoj Balaji Jagadeeshan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jagadeeshan, Manoj Balaji","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122912322","display_name":"Pawan Goyal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goyal, Pawan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122982210","display_name":"Sujeet Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Sujeet","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Keutzer, Kurt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Keutzer, Kurt","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5034724523"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.7056000232696533,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.7056000232696533,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.044199999421834946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.03920000046491623,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sanskrit","display_name":"Sanskrit","score":0.7508000135421753},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6032000184059143},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5392000079154968},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4943999946117401},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4343999922275543},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.40779998898506165},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.39980000257492065}],"concepts":[{"id":"https://openalex.org/C29912816","wikidata":"https://www.wikidata.org/wiki/Q11059","display_name":"Sanskrit","level":2,"score":0.7508000135421753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6881999969482422},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6333000063896179},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6032000184059143},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6028000116348267},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5392000079154968},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4943999946117401},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4343999922275543},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.39980000257492065},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.3393999934196472},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C2781291010","wikidata":"https://www.wikidata.org/wiki/Q178580","display_name":"Period (music)","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C24687705","wikidata":"https://www.wikidata.org/wiki/Q3753284","display_name":"Example-based machine translation","level":3,"score":0.2660999894142151},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.07314","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.07314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.07314","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.07314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6164774298667908,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"machine":[1,135],"translation":[2,136,248],"is":[3,19,41,110],"regarded":[4],"as":[5,29,47,82,84],"a":[6,42,50,85,111,132,158,167,202,209],"\"solved":[7],"problem\"":[8],"for":[9,23],"many":[10],"high-resource":[11],"languages,":[12],"close":[13],"analysis":[14],"quickly":[15],"reveals":[16],"that":[17,25,118],"this":[18,185,227,264],"not":[20],"the":[21,149,179,236,247,267],"case":[22],"content":[24],"shows":[26],"challenges":[27,55,245],"such":[28,54],"poetic":[30,100],"language,":[31],"philosophical":[32,98,252],"concepts,":[33,253],"multi-layered":[34,255],"metaphorical":[35],"expressions,":[36],"and":[37,65,123,166,181,192,208,223,229,232,254],"more.":[38],"Sanskrit":[39,153,172],"literature":[40],"prime":[43],"example":[44],"of":[45,53,78,88,107,114,126,139,161,170,184,190,206,213,249,269],"this,":[46],"it":[48],"combines":[49],"large":[51,86],"number":[52],"in":[56,246],"addition":[57],"to":[58,103,176],"inherent":[59],"linguistic":[60],"features":[61],"like":[62],"sandhi,":[63],"compounding,":[64],"heavy":[66],"morphology,":[67],"which":[68],"further":[69],"complicate":[70],"NLP":[71],"downstream":[72],"tasks.":[73],"It":[74,156],"spans":[75],"multiple":[76],"millennia":[77,165],"text":[79],"production":[80],"time":[81,159,193],"well":[83],"breadth":[87],"different":[89,121],"domains,":[90],"ranging":[91],"from":[92],"ritual":[93],"formulas":[94],"via":[95],"epic":[96],"narratives,":[97],"treatises,":[99],"verses":[101],"up":[102],"scientific":[104],"material.":[105],"As":[106],"now,":[108],"there":[109],"strong":[112],"lack":[113],"publicly":[115],"available":[116,152],"resources":[117],"cover":[119],"these":[120],"domains":[122],"temporal":[124,180],"layers":[125],"Sanskrit.":[127],"We":[128,199,218,257],"therefore":[129],"introduce":[130],"Mitrasamgraha,":[131],"high-quality":[133],"Sanskrit-to-English":[134],"dataset":[137,154,186,228,265],"consisting":[138,205,212],"391,548":[140],"bitext":[141,216],"pairs,":[142],"more":[143,162],"than":[144,148,163],"four":[145],"times":[146],"larger":[147],"largest":[150],"previously":[151],"Itih=asa.":[155],"covers":[157],"period":[160,194],"three":[164],"broad":[168],"range":[169],"historical":[171],"domains.":[173],"In":[174],"contrast":[175],"web-crawled":[177],"datasets,":[178],"domain":[182,191],"annotation":[183],"enables":[187],"fine-grained":[188],"study":[189],"effects":[195],"on":[196,226,235,263],"MT":[197],"performance.":[198],"also":[200,258],"release":[201],"validation":[203],"set":[204,211],"5,587":[207],"test":[210],"5,552":[214],"post-corrected":[215],"pairs.":[217],"conduct":[219],"experiments":[220],"benchmarking":[221],"commercial":[222,270],"open":[224],"models":[225,234,271],"fine-tune":[230],"NLLB":[231],"Gemma":[233],"dataset,":[237],"showing":[238],"significant":[239,244],"improvements,":[240],"while":[241],"still":[242],"recognizing":[243],"complex":[250],"compounds,":[251],"metaphors.":[256],"analyze":[259],"how":[260],"in-context":[261],"learning":[262],"impacts":[266],"performance":[268]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-01-14T00:00:00"}
