{"id":"https://openalex.org/W2898806654","doi":"https://doi.org/10.18653/v1/w18-5610","title":"Identification of Parallel Sentences in Comparable Monolingual Corpora from Different Registers","display_name":"Identification of Parallel Sentences in Comparable Monolingual Corpora from Different Registers","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2898806654","doi":"https://doi.org/10.18653/v1/w18-5610","mag":"2898806654"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w18-5610","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-5610","pdf_url":"https://www.aclweb.org/anthology/W18-5610.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W18-5610.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5025603922","display_name":"R\u00e9mi Cardon","orcid":"https://orcid.org/0000-0002-7858-3504"},"institutions":[{"id":"https://openalex.org/I2279609970","display_name":"Universit\u00e9 de Lille","ror":"https://ror.org/02kzqn938","country_code":"FR","type":"education","lineage":["https://openalex.org/I2279609970"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"R\u00e9mi Cardon","raw_affiliation_strings":["UMR CNRS 8163 -STL F-59000 Lille, France"],"affiliations":[{"raw_affiliation_string":"UMR CNRS 8163 -STL F-59000 Lille, France","institution_ids":["https://openalex.org/I2279609970","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007781745","display_name":"Natalia Grabar","orcid":"https://orcid.org/0000-0002-0237-4554"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I2279609970","display_name":"Universit\u00e9 de Lille","ror":"https://ror.org/02kzqn938","country_code":"FR","type":"education","lineage":["https://openalex.org/I2279609970"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Natalia Grabar","raw_affiliation_strings":["UMR CNRS 8163 -STL F-59000 Lille, France"],"affiliations":[{"raw_affiliation_string":"UMR CNRS 8163 -STL F-59000 Lille, France","institution_ids":["https://openalex.org/I2279609970","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5025603922"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I2279609970"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12409256,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"83","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8867142796516418},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.8645918369293213},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6977453231811523},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6429611444473267},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5897791385650635},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5868026614189148},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5689343810081482},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.5513279438018799},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4295649230480194},{"id":"https://openalex.org/keywords/semeval","display_name":"SemEval","score":0.41319015622138977},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4122336506843567},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.2911534607410431},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.16334179043769836},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.1578192412853241},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09147730469703674}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8867142796516418},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.8645918369293213},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6977453231811523},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6429611444473267},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5897791385650635},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5868026614189148},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5689343810081482},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.5513279438018799},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4295649230480194},{"id":"https://openalex.org/C44572571","wikidata":"https://www.wikidata.org/wiki/Q7448970","display_name":"SemEval","level":3,"score":0.41319015622138977},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4122336506843567},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2911534607410431},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.16334179043769836},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.1578192412853241},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09147730469703674},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.18653/v1/w18-5610","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-5610","pdf_url":"https://www.aclweb.org/anthology/W18-5610.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:halshs-01968351v1","is_oa":true,"landing_page_url":"https://shs.hal.science/halshs-01968351","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"LOUHI 2018:The Ninth International Workshop on Health Text Mining and Information Analysis, Oct 2018, Bruxelles, Belgium","raw_type":"Conference papers"},{"id":"pmh:oai:lilloa.univ-lille.fr:20.500.12210/64574","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.12210/64574","pdf_url":null,"source":{"id":"https://openalex.org/S4306402203","display_name":"LillOA (Universit\u00e9 de Lille (University Of Lille))","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210123514","host_organization_name":"Centre d'Etudes en Civilisations, Langues et Litt\u00e9ratures Etrang\u00e8res","host_organization_lineage":["https://openalex.org/I4210123514"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.18653/v1/w18-5610","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w18-5610","pdf_url":"https://www.aclweb.org/anthology/W18-5610.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7900000214576721}],"awards":[{"id":"https://openalex.org/G3312464302","display_name":null,"funder_award_id":"CE19-0016","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G6803667113","display_name":null,"funder_award_id":"ANR-17","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G8067288029","display_name":"Communication, Literacy, Education, Accessibility, Readability","funder_award_id":"ANR-17-CE19-0016","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320338463","display_name":"CHIST-ERA","ror":"https://ror.org/00rbzpz17"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2898806654.pdf","grobid_xml":"https://content.openalex.org/works/W2898806654.grobid-xml"},"referenced_works_count":70,"referenced_works":["https://openalex.org/W56743589","https://openalex.org/W75158669","https://openalex.org/W92412080","https://openalex.org/W569521333","https://openalex.org/W1486649854","https://openalex.org/W1489181569","https://openalex.org/W1493309689","https://openalex.org/W1566018662","https://openalex.org/W1647671624","https://openalex.org/W1751157425","https://openalex.org/W1847917847","https://openalex.org/W1952210680","https://openalex.org/W1967043694","https://openalex.org/W1973152633","https://openalex.org/W1978394996","https://openalex.org/W1995514968","https://openalex.org/W2000026602","https://openalex.org/W2001619934","https://openalex.org/W2003523627","https://openalex.org/W2008353316","https://openalex.org/W2040870580","https://openalex.org/W2053154970","https://openalex.org/W2061118075","https://openalex.org/W2061235289","https://openalex.org/W2083245912","https://openalex.org/W2093219119","https://openalex.org/W2100240599","https://openalex.org/W2102381086","https://openalex.org/W2104103102","https://openalex.org/W2106612368","https://openalex.org/W2106952837","https://openalex.org/W2107695330","https://openalex.org/W2109802560","https://openalex.org/W2109881807","https://openalex.org/W2130055902","https://openalex.org/W2133458109","https://openalex.org/W2135875128","https://openalex.org/W2140903445","https://openalex.org/W2141068210","https://openalex.org/W2152180407","https://openalex.org/W2152749681","https://openalex.org/W2155484602","https://openalex.org/W2159014656","https://openalex.org/W2159580878","https://openalex.org/W2161278885","https://openalex.org/W2166098990","https://openalex.org/W2168929382","https://openalex.org/W2181476823","https://openalex.org/W2250305120","https://openalex.org/W2250387780","https://openalex.org/W2250503148","https://openalex.org/W2251044566","https://openalex.org/W2251427843","https://openalex.org/W2251919380","https://openalex.org/W2461338233","https://openalex.org/W2462305634","https://openalex.org/W2508865106","https://openalex.org/W2523717353","https://openalex.org/W2565770050","https://openalex.org/W2576026769","https://openalex.org/W2734531809","https://openalex.org/W2743653651","https://openalex.org/W2963355447","https://openalex.org/W3208719332","https://openalex.org/W4213373262","https://openalex.org/W4233045210","https://openalex.org/W4239883048","https://openalex.org/W4299441026","https://openalex.org/W4388152766","https://openalex.org/W6601218445"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W4312814274","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703"],"abstract_inverted_index":{"Parallel":[0],"aligned":[1,73],"sentences":[2,69],"provide":[3],"useful":[4],"information":[5],"for":[6,19],"different":[7],"NLP":[8],"applications.":[9],"Yet,":[10],"this":[11],"kind":[12],"of":[13,65,88],"data":[14,79,116],"is":[15,58,70,113],"seldom":[16],"available,":[17],"especially":[18],"languages":[20],"other":[21],"than":[22],"English.":[23],"We":[24,84],"propose":[25],"to":[26,42,52,59,71,99,107,115,141],"exploit":[27,85],"comparable":[28],"corpora":[29,49],"in":[30,117,131,136],"French":[31],"which":[32,137],"are":[33,50,129],"distinguished":[34],"by":[35],"their":[36],"registers":[37],"(specialized":[38],"and":[39,44,67,90,103,127,133,143],"simplified":[40,68],"versions)":[41],"detect":[43],"align":[45],"parallel":[46],"sentences.":[47],"These":[48],"related":[51],"the":[53,110,120],"biomedical":[54],"area.":[55],"Our":[56],"purpose":[57],"state":[60],"whether":[61],"a":[62,86],"given":[63],"pair":[64],"specialized":[66],"be":[72],"or":[74],"not.":[75],"Manually":[76],"created":[77],"reference":[78],"show":[80,139],"0.76":[81],"inter-annotator":[82],"agreement.":[83],"set":[87],"features":[89,126],"several":[91],"automatic":[92,95],"classifiers.":[93],"The":[94,124],"alignment":[96],"reaches":[97],"up":[98,140],"0.93":[100],"Precision,":[101],"Recall":[102],"F-measure.":[104],"In":[105],"order":[106],"better":[108],"evaluate":[109],"method,":[111],"it":[112],"applied":[114,130],"English":[118],"from":[119],"SemEval":[121],"STS":[122],"competitions.":[123],"same":[125],"models":[128],"monolingual":[132],"cross-lingual":[134],"contexts,":[135],"they":[138],"0.90":[142],"0.73":[144],"F-measure,":[145],"respectively.":[146]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
