{"id":"https://openalex.org/W2117413750","doi":"https://doi.org/10.1145/1149290.1151098","title":"Broad coverage paragraph segmentation across languages and domains","display_name":"Broad coverage paragraph segmentation across languages and domains","publication_year":2006,"publication_date":"2006-07-01","ids":{"openalex":"https://openalex.org/W2117413750","doi":"https://doi.org/10.1145/1149290.1151098","mag":"2117413750"},"language":"en","primary_location":{"id":"doi:10.1145/1149290.1151098","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1149290.1151098","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010144873","display_name":"Caroline Sporleder","orcid":"https://orcid.org/0000-0002-5912-7028"},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Caroline Sporleder","raw_affiliation_strings":["Tilburg University, LE Tilburg, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Tilburg University, LE Tilburg, The Netherlands","institution_ids":["https://openalex.org/I193700539"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041024491","display_name":"Mirella Lapata","orcid":"https://orcid.org/0000-0002-2107-1516"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mirella Lapata","raw_affiliation_strings":["University of Edinburgh, Edinburgh, UK","University of Edinburgh, Edinburgh , UK"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh, Edinburgh, UK","institution_ids":["https://openalex.org/I98677209"]},{"raw_affiliation_string":"University of Edinburgh, Edinburgh , UK","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5010144873"],"corresponding_institution_ids":["https://openalex.org/I193700539"],"apc_list":null,"apc_paid":null,"fwci":2.326,"has_fulltext":false,"cited_by_count":25,"citation_normalized_percentile":{"value":0.89595055,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"3","issue":"2","first_page":"1","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8837498426437378},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.8835076689720154},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.7733553051948547},{"id":"https://openalex.org/keywords/punctuation","display_name":"Punctuation","score":0.6885475516319275},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6856395602226257},{"id":"https://openalex.org/keywords/structuring","display_name":"Structuring","score":0.6210405230522156},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6158448457717896},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6128729581832886},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5417312979698181},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.47583746910095215},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08258768916130066}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8837498426437378},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.8835076689720154},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.7733553051948547},{"id":"https://openalex.org/C540372491","wikidata":"https://www.wikidata.org/wiki/Q82622","display_name":"Punctuation","level":2,"score":0.6885475516319275},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6856395602226257},{"id":"https://openalex.org/C2775945657","wikidata":"https://www.wikidata.org/wiki/Q381442","display_name":"Structuring","level":2,"score":0.6210405230522156},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6158448457717896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6128729581832886},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5417312979698181},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.47583746910095215},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08258768916130066},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/1149290.1151098","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1149290.1151098","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:tilburguniversity.edu:publications/f8b922f9-4f41-41a8-8996-58f17a378daa","is_oa":false,"landing_page_url":"https://research.tilburguniversity.edu/en/publications/f8b922f9-4f41-41a8-8996-58f17a378daa","pdf_url":null,"source":{"id":"https://openalex.org/S4306401490","display_name":"Research portal (Tilburg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I193700539","host_organization_name":"Tilburg University","host_organization_lineage":["https://openalex.org/I193700539"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.329.1883","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.329.1883","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://homepages.inf.ed.ac.uk/mlap/Papers/p1-sporleder.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.99.2645","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.99.2645","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ilk.uvt.nl/~caroline/papers/tslp06.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W89847322","https://openalex.org/W136741500","https://openalex.org/W240701929","https://openalex.org/W362760882","https://openalex.org/W1483817654","https://openalex.org/W1490831147","https://openalex.org/W1490960179","https://openalex.org/W1495373554","https://openalex.org/W1516768428","https://openalex.org/W1525245406","https://openalex.org/W1535015163","https://openalex.org/W1552626377","https://openalex.org/W1557074680","https://openalex.org/W1573514622","https://openalex.org/W1597655096","https://openalex.org/W1603598191","https://openalex.org/W1604285433","https://openalex.org/W1626945812","https://openalex.org/W1673478674","https://openalex.org/W1710422233","https://openalex.org/W1814257755","https://openalex.org/W1828401780","https://openalex.org/W1988253520","https://openalex.org/W2002617716","https://openalex.org/W2002664886","https://openalex.org/W2005252629","https://openalex.org/W2010523086","https://openalex.org/W2013056791","https://openalex.org/W2013568550","https://openalex.org/W2018560257","https://openalex.org/W2032210760","https://openalex.org/W2053463056","https://openalex.org/W2080179128","https://openalex.org/W2084430736","https://openalex.org/W2100750861","https://openalex.org/W2100873065","https://openalex.org/W2102381086","https://openalex.org/W2104364170","https://openalex.org/W2107402508","https://openalex.org/W2134731454","https://openalex.org/W2148818577","https://openalex.org/W2149367074","https://openalex.org/W2159083595","https://openalex.org/W2166501286","https://openalex.org/W2426479676","https://openalex.org/W2593831809","https://openalex.org/W2601748485","https://openalex.org/W2915027006","https://openalex.org/W2949496004","https://openalex.org/W3100435646","https://openalex.org/W4232814231","https://openalex.org/W4244952642","https://openalex.org/W6632145054","https://openalex.org/W6637213893","https://openalex.org/W6674877832","https://openalex.org/W6785765844"],"related_works":["https://openalex.org/W2936002343","https://openalex.org/W2188883480","https://openalex.org/W1592364192","https://openalex.org/W656840002","https://openalex.org/W1605117403","https://openalex.org/W2381416480","https://openalex.org/W2380599343","https://openalex.org/W2364789806","https://openalex.org/W2908111806","https://openalex.org/W2580041870"],"abstract_inverted_index":{"This":[0],"article":[1],"considers":[2],"the":[3,58,93,131],"problem":[4],"of":[5,70,110,133],"automatic":[6,45],"paragraph":[7,25,63],"segmentation.":[8],"The":[9],"task":[10],"is":[11,127],"relevant":[12],"for":[13,129],"speech-to-text":[14],"applications":[15,37],"whose":[16],"output":[17,132],"transcipts":[18],"do":[19],"not":[20],"usually":[21],"contain":[22],"punctuation":[23],"or":[24],"indentation":[26],"and":[27,33,53,77,80,87,100,123],"are":[28],"naturally":[29],"difficult":[30],"to":[31,57,105],"read":[32],"process.":[34],"Text-to-text":[35],"generation":[36],"(e.g.,":[38],"summarization)":[39],"could":[40],"also":[41],"benefit":[42],"from":[43],"an":[44],"paragaraph":[46],"segementation":[47],"mechanism":[48],"which":[49,66],"indicates":[50],"topic":[51],"shifts":[52],"provides":[54],"visual":[55],"targets":[56],"reader.":[59],"We":[60],"present":[61],"a":[62,68,107,119],"segmentation":[64],"model":[65],"exploits":[67],"variety":[69],"knowledge":[71],"sources":[72],"(including":[73],"textual":[74],"cues,":[75],"syntactic":[76],"discourse-related":[78],"information)":[79],"evaluate":[81],"its":[82],"performance":[83],"in":[84,101],"different":[85],"languages":[86],"domains.":[88],"Our":[89],"experiments":[90],"demonstrate":[91],"that":[92,125],"proposed":[94],"approach":[95],"significantly":[96],"outperforms":[97],"our":[98,116],"baselines":[99],"many":[102],"cases":[103],"comes":[104],"within":[106],"few":[108],"percent":[109],"human":[111],"performance.":[112],"Finally,":[113],"we":[114],"integrate":[115],"method":[117],"with":[118],"single":[120],"document":[121],"summarizer":[122],"show":[124],"it":[126],"useful":[128],"structuring":[130],"automatically":[134],"generated":[135],"text.":[136]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
