{"id":"https://openalex.org/W3210457608","doi":"https://doi.org/10.5281/zenodo.3354711","title":"MaSS - Multilingual corpus of Sentence-aligned Spoken utterances","display_name":"MaSS - Multilingual corpus of Sentence-aligned Spoken utterances","publication_year":2019,"publication_date":"2019-07-30","ids":{"openalex":"https://openalex.org/W3210457608","doi":"https://doi.org/10.5281/zenodo.3354711","mag":"3210457608"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:3354711","is_oa":true,"landing_page_url":"https://zenodo.org/record/3354711","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/3354711","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043744108","display_name":"Marcely Zanon Boito","orcid":"https://orcid.org/0000-0003-0134-6719"},"institutions":[{"id":"https://openalex.org/I4400008911","display_name":"Institut des langues et cultures d'Europe, Am\u00e9rique, Afrique, Asie et Australie","ror":"https://ror.org/02bxdh068","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4400008911"]},{"id":"https://openalex.org/I899635006","display_name":"Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/02rx3b187","country_code":"FR","type":"education","lineage":["https://openalex.org/I899635006"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Boito, Marcely Zanon","raw_affiliation_strings":["Universit\u00e9 Grenoble Alpes"],"raw_orcid":"https://orcid.org/0000-0003-0134-6719","affiliations":[{"raw_affiliation_string":"Universit\u00e9 Grenoble Alpes","institution_ids":["https://openalex.org/I899635006","https://openalex.org/I4400008911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008481817","display_name":"William N. Havard","orcid":"https://orcid.org/0000-0002-1226-4156"},"institutions":[{"id":"https://openalex.org/I4400008911","display_name":"Institut des langues et cultures d'Europe, Am\u00e9rique, Afrique, Asie et Australie","ror":"https://ror.org/02bxdh068","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4400008911"]},{"id":"https://openalex.org/I899635006","display_name":"Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/02rx3b187","country_code":"FR","type":"education","lineage":["https://openalex.org/I899635006"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Havard, William N.","raw_affiliation_strings":["Universit\u00e9 Grenoble Alpes"],"raw_orcid":"https://orcid.org/0000-0002-1226-4156","affiliations":[{"raw_affiliation_string":"Universit\u00e9 Grenoble Alpes","institution_ids":["https://openalex.org/I899635006","https://openalex.org/I4400008911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014411201","display_name":"Mahault Garnerin","orcid":null},"institutions":[{"id":"https://openalex.org/I4400008911","display_name":"Institut des langues et cultures d'Europe, Am\u00e9rique, Afrique, Asie et Australie","ror":"https://ror.org/02bxdh068","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4400008911"]},{"id":"https://openalex.org/I899635006","display_name":"Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/02rx3b187","country_code":"FR","type":"education","lineage":["https://openalex.org/I899635006"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mahault Garnerin","raw_affiliation_strings":["Universit\u00e9 Grenoble Alpes"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Grenoble Alpes","institution_ids":["https://openalex.org/I899635006","https://openalex.org/I4400008911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049992651","display_name":"\u00c9ric Le Ferrand","orcid":"https://orcid.org/0000-0001-7626-2988"},"institutions":[{"id":"https://openalex.org/I4400008911","display_name":"Institut des langues et cultures d'Europe, Am\u00e9rique, Afrique, Asie et Australie","ror":"https://ror.org/02bxdh068","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4400008911"]},{"id":"https://openalex.org/I899635006","display_name":"Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/02rx3b187","country_code":"FR","type":"education","lineage":["https://openalex.org/I899635006"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"\u00c9ric Le Ferrand","raw_affiliation_strings":["Universit\u00e9 Grenoble Alpes"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Grenoble Alpes","institution_ids":["https://openalex.org/I899635006","https://openalex.org/I4400008911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040820339","display_name":"Laurent Besacier","orcid":"https://orcid.org/0000-0001-7411-9125"},"institutions":[{"id":"https://openalex.org/I4400008911","display_name":"Institut des langues et cultures d'Europe, Am\u00e9rique, Afrique, Asie et Australie","ror":"https://ror.org/02bxdh068","country_code":"FR","type":"facility","lineage":["https://openalex.org/I4400008911"]},{"id":"https://openalex.org/I899635006","display_name":"Universit\u00e9 Grenoble Alpes","ror":"https://ror.org/02rx3b187","country_code":"FR","type":"education","lineage":["https://openalex.org/I899635006"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Besacier, Laurent","raw_affiliation_strings":["Universit\u00e9 Grenoble Alpes"],"raw_orcid":"https://orcid.org/0000-0001-7411-9125","affiliations":[{"raw_affiliation_string":"Universit\u00e9 Grenoble Alpes","institution_ids":["https://openalex.org/I899635006","https://openalex.org/I4400008911"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8618000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.8618000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.8353999853134155,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.6810298562049866},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6401803493499756},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4766607880592346},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4754982888698578},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3610287308692932},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.07600763440132141}],"concepts":[{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.6810298562049866},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6401803493499756},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4766607880592346},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4754982888698578},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3610287308692932},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.07600763440132141}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:3354711","is_oa":true,"landing_page_url":"https://zenodo.org/record/3354711","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.3354711","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.3354711","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:3354711","is_oa":true,"landing_page_url":"https://zenodo.org/record/3354711","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W3204019825"],"abstract_inverted_index":{"<strong>Abstract</strong>":[0],"The":[1,102,129],"CMU":[2],"Wilderness":[3],"Multilingual":[4],"Speech":[5,28],"Dataset":[6],"is":[7,48,55,135,199,220],"a":[8,76,142,200,233,258],"newly":[9],"published":[10],"multilingual":[11,66],"speech":[12,69],"dataset":[13,80],"based":[14],"on":[15,116,122,141],"recorded":[16],"readings":[17],"of":[18,81,98,131,190,206],"the":[19,40,43,49,53,132,155,160,188,191,195,204,207,213,226,239,244,253,271],"New":[20],"Testament.":[21],"It":[22],"provides":[23],"data":[24,161],"to":[25,58,64,158,187],"build":[26,159],"Automatic":[27],"Recognition":[29],"(ASR)":[30],"and":[31,74,78,112,209],"Text-to-Speech":[32],"(TTS)":[33],"models":[34],"for":[35,51,124,176,232],"potentially":[36],"700":[37],"languages.":[38],"However,":[39],"fact":[41],"that":[42],"source":[44],"content":[45],"(the":[46],"Bible),":[47],"same":[50],"all":[52],"languages":[54,88,104],"not":[56,230],"exploited":[57],"date.":[59],"Therefore,":[60],"this":[61,93],"article":[62],"proposes":[63],"add":[65],"links":[67],"between":[68,203,262],"segments":[70],"in":[71,194,212,225,243,252],"different":[72],"languages,":[73],"shares":[75],"large":[77],"clean":[79],"8,130":[82],"para-lel":[83],"spoken":[84],"utterances":[85],"across":[86],"8":[87,147,172],"(56":[89],"language":[90,127,148,235],"pairs).We":[91],"name":[92],"corpus":[94,97,134,143],"MaSS":[95],"(Multilingual":[96],"Sentence-aligned":[99],"Spoken":[100],"utterances).":[101],"covered":[103],"(Basque,":[105],"English,":[106],"Finnish,":[107],"French,":[108],"Hungarian,":[109],"Romanian,":[110],"Russian":[111],"Spanish)":[113],"allow":[114],"researches":[115],"speech-to-speech":[117],"alignment":[118],"as":[119,121],"well":[120],"translation":[123],"syntactically":[125],"divergent":[126],"pairs.":[128],"quality":[130],"final":[133],"attested":[136],"by":[137,238,249],"human":[138],"evaluation":[139],"performed":[140],"subset":[144],"(100":[145],"utterances,":[146],"pairs).":[149],"Paper":[150],"|":[151],"GitHub":[152],"Repository":[153],"containing":[154],"scripts":[156],"needed":[157],"set":[162],"from":[163],"scratch":[164],"(if":[165],"needed)":[166],"<strong>Project":[167],"structure</strong>":[168],"This":[169],"repository":[170],"contains":[171],"Numpy":[173,227,254],"files,":[174,255],"one":[175],"each":[177,263],"featured":[178],"language,":[179],"pickled":[180],"with":[181,217,270],"Python":[182],"3.6.":[183],"Each":[184],"line":[185],"corresponds":[186],"spectrogram":[189],"file":[192,196],"mentioned":[193],"<em>verses.csv</em>.":[197],"There":[198],"direct":[201],"mapping":[202],"ID":[205,218],"verse":[208,216],"its":[210],"index":[211,223],"list":[214],"(thus":[215],"5634":[219,224],"located":[221],"at":[222],"file).":[228],"Verses":[229],"available":[231],"given":[234],"(as":[236],"stated":[237],"value":[240],"\"Not":[241],"Available\"":[242],"CSV":[245],"file)":[246],"are":[247],"represented":[248],"empty":[250],"lists":[251],"thus":[256],"ensuring":[257],"perfect":[259],"verse-to-verse":[260],"alignement":[261],"file.":[264],"Spectrogram":[265],"were":[266],"extracted":[267],"using":[268],"Librosa":[269],"following":[272],"parameters:":[273],"<pre><code>Pre-emphasis":[274],"=":[275,279,283,287,291,295,299],"0.97":[276],"Sample":[277],"rate":[278],"16000":[280],"Window":[281,285,289],"size":[282],"0.025":[284],"stride":[286],"0.01":[288],"type":[290],"'hamming'":[292],"Mel":[293],"coefficients":[294],"40":[296],"Min":[297],"frequency":[298],"20</code></pre>":[300]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
