{"id":"https://openalex.org/W3156404059","doi":"https://doi.org/10.1162/tacl_a_00452","title":"<i>Samanantar</i>: The Largest Publicly Available Parallel Corpora Collection for 11 Indic Languages","display_name":"<i>Samanantar</i>: The Largest Publicly Available Parallel Corpora Collection for 11 Indic Languages","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W3156404059","doi":"https://doi.org/10.1162/tacl_a_00452","mag":"3156404059"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00452","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00452","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00452/1987010/tacl_a_00452.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00452/1987010/tacl_a_00452.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102974082","display_name":"G. Ramesh","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gowtham Ramesh","raw_affiliation_strings":["RBCDSAI, India"],"affiliations":[{"raw_affiliation_string":"RBCDSAI, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078774993","display_name":"Sumanth Doddapaneni","orcid":"https://orcid.org/0000-0003-4248-646X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sumanth Doddapaneni","raw_affiliation_strings":["RBCDSAI, India"],"affiliations":[{"raw_affiliation_string":"RBCDSAI, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050265596","display_name":"Aravinth Bheemaraj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aravinth Bheemaraj","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030390970","display_name":"Mayank Jobanputra","orcid":"https://orcid.org/0000-0002-8802-2401"},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Mayank Jobanputra","raw_affiliation_strings":["IIT Madras, India"],"affiliations":[{"raw_affiliation_string":"IIT Madras, India","institution_ids":["https://openalex.org/I24676775"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002372252","display_name":"Raghavan AK","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raghavan AK","raw_affiliation_strings":["AI4Bharat, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051361759","display_name":"Ajitesh Sharma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ajitesh Sharma","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085163865","display_name":"Sujit Kumar Sahoo","orcid":"https://orcid.org/0000-0002-1208-9466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sujit Sahoo","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059967242","display_name":"Harshita Diddee","orcid":"https://orcid.org/0000-0002-0852-7371"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harshita Diddee","raw_affiliation_strings":["AI4Bharat, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034278383","display_name":"J. Mahalakshmi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mahalakshmi J","raw_affiliation_strings":["AI4Bharat, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031343973","display_name":"Divyanshu Kakwani","orcid":null},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Divyanshu Kakwani","raw_affiliation_strings":["AI4Bharat, India","IIT Madras, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]},{"raw_affiliation_string":"IIT Madras, India","institution_ids":["https://openalex.org/I24676775"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106623671","display_name":"Navneet Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Navneet Kumar","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040926086","display_name":"Aswin Pradeep","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aswin Pradeep","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000767265","display_name":"Srihari Nagaraj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srihari Nagaraj","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103536702","display_name":"K. Sai Deepak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar Deepak","raw_affiliation_strings":["EkStep Foundation, India","Tarento Technologies, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]},{"raw_affiliation_string":"Tarento Technologies, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109888290","display_name":"V. Raghavan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vivek Raghavan","raw_affiliation_strings":["EkStep Foundation, India"],"affiliations":[{"raw_affiliation_string":"EkStep Foundation, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052909911","display_name":"Anoop Kunchukuttan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210162141","display_name":"Microsoft (India)","ror":"https://ror.org/04ww0w091","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210162141"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anoop Kunchukuttan","raw_affiliation_strings":["AI4Bharat, India","Microsoft, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]},{"raw_affiliation_string":"Microsoft, India","institution_ids":["https://openalex.org/I4210162141"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101614266","display_name":"Pratyush Kumar","orcid":"https://orcid.org/0000-0002-8732-0183"},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Pratyush Kumar","raw_affiliation_strings":["AI4Bharat, India","IIT Madras, India","RBCDSAI, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]},{"raw_affiliation_string":"IIT Madras, India","institution_ids":["https://openalex.org/I24676775"]},{"raw_affiliation_string":"RBCDSAI, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035504104","display_name":"Mitesh Shantadevi Khapra","orcid":null},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Mitesh Shantadevi Khapra","raw_affiliation_strings":["AI4Bharat, India","IIT Madras, India","RBCDSAI, India"],"affiliations":[{"raw_affiliation_string":"AI4Bharat, India","institution_ids":[]},{"raw_affiliation_string":"IIT Madras, India","institution_ids":["https://openalex.org/I24676775"]},{"raw_affiliation_string":"RBCDSAI, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5102974082"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.866,"has_fulltext":true,"cited_by_count":116,"citation_normalized_percentile":{"value":0.99193938,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"10","issue":null,"first_page":"145","last_page":"162"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8833944797515869},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7350183725357056},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6925426721572876},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6656521558761597},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.6365706920623779},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4636830687522888},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4468452036380768},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38132667541503906},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.3083116412162781},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1247422993183136}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8833944797515869},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7350183725357056},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6925426721572876},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6656521558761597},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.6365706920623779},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4636830687522888},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4468452036380768},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38132667541503906},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.3083116412162781},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1247422993183136},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00452","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00452","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00452/1987010/tacl_a_00452.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:c9ea7cbf91d84513857037f88d330b57","is_oa":true,"landing_page_url":"https://doaj.org/article/c9ea7cbf91d84513857037f88d330b57","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 10 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00452","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00452","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00452/1987010/tacl_a_00452.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3156404059.pdf","grobid_xml":"https://content.openalex.org/works/W3156404059.grobid-xml"},"referenced_works_count":112,"referenced_works":["https://openalex.org/W170711724","https://openalex.org/W572280432","https://openalex.org/W630532510","https://openalex.org/W1519842253","https://openalex.org/W1847618513","https://openalex.org/W1906341845","https://openalex.org/W2041532239","https://openalex.org/W2124509324","https://openalex.org/W2156984567","https://openalex.org/W2257408573","https://openalex.org/W2296073425","https://openalex.org/W2316579313","https://openalex.org/W2419539795","https://openalex.org/W2462305634","https://openalex.org/W2525778437","https://openalex.org/W2531621976","https://openalex.org/W2531638282","https://openalex.org/W2550821151","https://openalex.org/W2552515779","https://openalex.org/W2593864460","https://openalex.org/W2611838487","https://openalex.org/W2759088880","https://openalex.org/W2771576526","https://openalex.org/W2889511806","https://openalex.org/W2892004290","https://openalex.org/W2919290281","https://openalex.org/W2933138175","https://openalex.org/W2948902769","https://openalex.org/W2949303037","https://openalex.org/W2952509486","https://openalex.org/W2958953787","https://openalex.org/W2960374072","https://openalex.org/W2962784628","https://openalex.org/W2963216553","https://openalex.org/W2963247703","https://openalex.org/W2963281280","https://openalex.org/W2963310665","https://openalex.org/W2963341956","https://openalex.org/W2963506925","https://openalex.org/W2963633299","https://openalex.org/W2964308564","https://openalex.org/W2970279348","https://openalex.org/W2970360209","https://openalex.org/W2970925677","https://openalex.org/W2971120622","https://openalex.org/W2973088264","https://openalex.org/W2982533268","https://openalex.org/W2986148666","https://openalex.org/W2988451549","https://openalex.org/W2990138404","https://openalex.org/W3000965575","https://openalex.org/W3017604292","https://openalex.org/W3034696692","https://openalex.org/W3039149172","https://openalex.org/W3039695075","https://openalex.org/W3046368065","https://openalex.org/W3048539749","https://openalex.org/W3082928416","https://openalex.org/W3099919888","https://openalex.org/W3100806282","https://openalex.org/W3103064977","https://openalex.org/W3104652516","https://openalex.org/W3104688854","https://openalex.org/W3105425516","https://openalex.org/W3109864162","https://openalex.org/W3112593586","https://openalex.org/W3115711567","https://openalex.org/W3118415754","https://openalex.org/W3119866316","https://openalex.org/W3120929527","https://openalex.org/W3137010024","https://openalex.org/W3169483174","https://openalex.org/W3175301726","https://openalex.org/W4235036578","https://openalex.org/W4240904472","https://openalex.org/W4288025890","https://openalex.org/W4288284086","https://openalex.org/W4299574851","https://openalex.org/W4299585995","https://openalex.org/W4300963525","https://openalex.org/W4301187301","https://openalex.org/W4385245566","https://openalex.org/W6606950612","https://openalex.org/W6639953804","https://openalex.org/W6679434410","https://openalex.org/W6683188251","https://openalex.org/W6689336786","https://openalex.org/W6717262007","https://openalex.org/W6718924430","https://openalex.org/W6734897383","https://openalex.org/W6739901393","https://openalex.org/W6743965040","https://openalex.org/W6744973198","https://openalex.org/W6745064204","https://openalex.org/W6754156487","https://openalex.org/W6760136687","https://openalex.org/W6765386044","https://openalex.org/W6765469073","https://openalex.org/W6767105575","https://openalex.org/W6770014243","https://openalex.org/W6773387851","https://openalex.org/W6777399232","https://openalex.org/W6779941907","https://openalex.org/W6781811055","https://openalex.org/W6782410572","https://openalex.org/W6783447026","https://openalex.org/W6785065554","https://openalex.org/W6785198779","https://openalex.org/W6787839081","https://openalex.org/W6845401343","https://openalex.org/W6939216866","https://openalex.org/W7051469422"],"related_works":["https://openalex.org/W2786253471","https://openalex.org/W3175595715","https://openalex.org/W2604275745","https://openalex.org/W2986030184","https://openalex.org/W2104907655","https://openalex.org/W2985215540","https://openalex.org/W4307459710","https://openalex.org/W3155572818","https://openalex.org/W4293584592","https://openalex.org/W4285266806"],"abstract_inverted_index":{"Abstract":[0],"We":[1,62,155],"present":[2],"Samanantar,":[3],"the":[4,55,64,68,116,121,125,145,152,180],"largest":[5],"publicly":[6,43,173,190],"available":[7,44,174,189],"parallel":[8,45,65,126,147],"corpora":[9,119],"collection":[10,15,108],"for":[11,84,94,103,206],"Indic":[12,28,141,207],"languages.":[13,130,208],"The":[14],"contains":[16],"a":[17,59,106],"total":[18],"of":[19,109,113,124,182],"49.7":[20],"million":[21,38,51,135],"sentence":[22,39,52,136],"pairs":[23,40,53,137,143],"between":[24,138],"English":[25,150],"and":[26,47,75,97,170,186,193,203],"11":[27,129],"languages":[29,163],"(from":[30],"two":[31],"language":[32,142],"families).":[33],"Specifically,":[34],"we":[35,132,194],"compile":[36],"12.4":[37],"from":[41,54,67,87,115,144],"existing,":[42],"corpora,":[46,73,80],"additionally":[48],"mine":[49,63],"37.4":[50],"Web,":[56],"resulting":[57],"in":[58,105,201],"4\u00d7":[60],"increase.":[61],"sentences":[66,86,127],"Web":[69],"by":[70],"combining":[71],"many":[72],"tools,":[74],"methods:":[76],"(a)":[77],"Web-crawled":[78],"monolingual":[79],"(b)":[81],"document":[82],"OCR":[83],"extracting":[85],"scanned":[88],"documents,":[89],"(c)":[90],"multilingual":[91,157,204],"representation":[92],"models":[93,159,169,187],"aligning":[95],"sentences,":[96],"(d)":[98],"approximate":[99],"nearest":[100],"neighbor":[101],"search":[102],"searching":[104],"large":[107],"sentences.":[110],"Human":[111],"evaluation":[112],"samples":[114],"newly":[117],"mined":[118],"validate":[120],"high":[122],"quality":[123],"across":[128],"Further,":[131],"extract":[133],"83.4":[134],"all":[139,161],"55":[140],"English-centric":[146],"corpus":[148],"using":[149],"as":[151,177],"pivot":[153],"language.":[154],"trained":[156],"NMT":[158,202],"spanning":[160],"these":[162],"on":[164,172],"Samanantar":[165,192],"which":[166],"outperform":[167],"existing":[168],"baselines":[171],"benchmarks,":[175],"such":[176],"FLORES,":[178],"establishing":[179],"utility":[181],"Samanantar.":[183],"Our":[184],"data":[185],"are":[188],"at":[191],"hope":[195],"they":[196],"will":[197],"help":[198],"advance":[199],"research":[200],"NLP":[205]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":24},{"year":2023,"cited_by_count":44},{"year":2022,"cited_by_count":17},{"year":2021,"cited_by_count":6}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
