{"id":"https://openalex.org/W2609414830","doi":"https://doi.org/10.18653/v1/p17-2037","title":"Cross-lingual and cross-domain discourse segmentation of entire documents","display_name":"Cross-lingual and cross-domain discourse segmentation of entire documents","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2609414830","doi":"https://doi.org/10.18653/v1/p17-2037","mag":"2609414830"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p17-2037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p17-2037","pdf_url":"https://www.aclweb.org/anthology/P17-2037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 55th Annual Meeting of the Association for\n          Computational Linguistics (Volume 2: Short Papers)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P17-2037.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044698676","display_name":"Chlo\u00e9 Braud","orcid":"https://orcid.org/0000-0002-1874-3430"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":true,"raw_author_name":"Chlo\u00e9 Braud","raw_affiliation_strings":["University of Copenhagen, Copenhagen, Denmark"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen, Copenhagen, Denmark","institution_ids":["https://openalex.org/I124055696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047568486","display_name":"Oph\u00e9lie Lacroix","orcid":null},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Oph\u00e9lie Lacroix","raw_affiliation_strings":["CoAStaL DIKU University of Copenhagen University Park 5, 2100 Copenhagen","University of Copenhagen, Copenhagen, Denmark"],"affiliations":[{"raw_affiliation_string":"CoAStaL DIKU University of Copenhagen University Park 5, 2100 Copenhagen","institution_ids":["https://openalex.org/I124055696"]},{"raw_affiliation_string":"University of Copenhagen, Copenhagen, Denmark","institution_ids":["https://openalex.org/I124055696"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018138946","display_name":"Anders S\u00f8gaard","orcid":"https://orcid.org/0000-0001-5250-4276"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Anders S\u00f8gaard","raw_affiliation_strings":["CoAStaL DIKU University of Copenhagen University Park 5, 2100 Copenhagen","University of Copenhagen, Copenhagen, Denmark"],"affiliations":[{"raw_affiliation_string":"CoAStaL DIKU University of Copenhagen University Park 5, 2100 Copenhagen","institution_ids":["https://openalex.org/I124055696"]},{"raw_affiliation_string":"University of Copenhagen, Copenhagen, Denmark","institution_ids":["https://openalex.org/I124055696"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5044698676"],"corresponding_institution_ids":["https://openalex.org/I124055696"],"apc_list":null,"apc_paid":null,"fwci":0.4154,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70785146,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7936477661132812},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7709946632385254},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7194973826408386},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6743858456611633},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6440392732620239},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6353240609169006},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5403980612754822},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5063737630844116},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4626511037349701},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.45455941557884216},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11665251851081848}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7936477661132812},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7709946632385254},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7194973826408386},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6743858456611633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6440392732620239},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6353240609169006},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5403980612754822},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5063737630844116},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4626511037349701},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.45455941557884216},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11665251851081848},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.18653/v1/p17-2037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p17-2037","pdf_url":"https://www.aclweb.org/anthology/P17-2037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 55th Annual Meeting of the Association for\n          Computational Linguistics (Volume 2: Short Papers)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1704.04100","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1704.04100","pdf_url":"https://arxiv.org/pdf/1704.04100","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2609414830","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1704.04100.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:HAL:hal-02373807v1","is_oa":true,"landing_page_url":"https://hal.science/hal-02373807","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annual Meeting of the Association for Computational Linguistics (ACL), Jul 2017, Vancouver, Canada. pp.237 - 243, &#x27E8;10.18653/v1/P17-2037&#x27E9;","raw_type":"Conference papers"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/41d95411-625e-4892-9fc3-c7c030851f71","is_oa":false,"landing_page_url":"https://researchprofiles.ku.dk/da/publications/41d95411-625e-4892-9fc3-c7c030851f71","pdf_url":null,"source":{"id":"https://openalex.org/S4306401983","display_name":"Research at the University of Copenhagen (University of Copenhagen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I124055696","host_organization_name":"University of Copenhagen","host_organization_lineage":["https://openalex.org/I124055696"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Braud , C , Lacroix , O & S\u00f8gaard , A 2017 , Cross-lingual and cross-domain discourse segmentation of entire documents . in Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics : Short papers . vol. 2 , Association for Computational Linguistics , pp. 237-243 , 55th Annual Meeting of the Association for Computational Linguistics, ACL 2017 , Vancouver , Canada , 30/07/2017 . https://doi.org/10.18653/v1/P17-2037","raw_type":"contributionToPeriodical"},{"id":"doi:10.48550/arxiv.1704.04100","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1704.04100","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/p17-2037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p17-2037","pdf_url":"https://www.aclweb.org/anthology/P17-2037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 55th Annual Meeting of the Association for\n          Computational Linguistics (Volume 2: Short Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8500000238418579,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2609414830.pdf","grobid_xml":"https://content.openalex.org/works/W2609414830.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1580880669","https://openalex.org/W1614862348","https://openalex.org/W1936145620","https://openalex.org/W2028953697","https://openalex.org/W2045738181","https://openalex.org/W2064675550","https://openalex.org/W2130867674","https://openalex.org/W2154407881","https://openalex.org/W2158899491","https://openalex.org/W2162763612","https://openalex.org/W2165887782","https://openalex.org/W2186590411","https://openalex.org/W2250767751","https://openalex.org/W2251293245","https://openalex.org/W2251496369","https://openalex.org/W2251741120","https://openalex.org/W2341311217","https://openalex.org/W2347958142","https://openalex.org/W2461928176","https://openalex.org/W2567942006","https://openalex.org/W2572114121","https://openalex.org/W2583797781","https://openalex.org/W2964090065"],"related_works":["https://openalex.org/W2952979538","https://openalex.org/W2336840621","https://openalex.org/W3098744621","https://openalex.org/W3212833915","https://openalex.org/W2963631343","https://openalex.org/W2891068404","https://openalex.org/W2948641826","https://openalex.org/W2394856069","https://openalex.org/W3213710625","https://openalex.org/W2949604985","https://openalex.org/W2981294775","https://openalex.org/W2250773991","https://openalex.org/W2941819978","https://openalex.org/W3096841893","https://openalex.org/W2756639730","https://openalex.org/W3118106810","https://openalex.org/W2972572821","https://openalex.org/W3176138503","https://openalex.org/W3008039793","https://openalex.org/W3119707607"],"abstract_inverted_index":{"Discourse":[0],"segmentation":[1],"is":[2,88],"a":[3,17,91],"crucial":[4],"step":[5],"in":[6,107,123],"building":[7],"end-to-end":[8],"discourse":[9,12,60,82],"parsers.":[10],"However,":[11],"segmenters":[13,61,83],"only":[14,24],"exist":[15],"for":[16,62,90,101,120],"few":[18],"languages":[19,51,64,122],"and":[20,33,36,42,52,65,112,116],"domains.":[21,53],"Typically":[22],"they":[23],"detect":[25],"intra-sentential":[26],"segment":[27],"boundaries,":[28],"assuming":[29],"gold":[30,73],"standard":[31],"sentence":[32],"token":[34],"segmentation,":[35],"relying":[37],"on":[38,72,109],"high-quality":[39],"syntactic":[40],"parses":[41],"rich":[43],"heuristics":[44],"that":[45,68],"are":[46],"not":[47,70],"generally":[48],"available":[49,89],"across":[50],"In":[54],"this":[55],"paper,":[56],"we":[57,113],"propose":[58],"statistical":[59],"five":[63,121],"three":[66],"domains":[67],"do":[69],"rely":[71],"preannotations.":[74],"We":[75],"also":[76],"consider":[77],"the":[78],"problem":[79],"of":[80],"learning":[81],"when":[84],"no":[85],"labeled":[86],"data":[87],"language.":[92],"Our":[93],"fully":[94],"supervised":[95,115],"system":[96],"obtains":[97],"89.5%":[98],"F":[99],"1":[100],"English":[102],"newswire,":[103],"with":[104],"slight":[105],"drops":[106],"performance":[108],"other":[110],"domains,":[111],"report":[114],"unsupervised":[117],"(cross-lingual)":[118],"results":[119],"total.":[124]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
