{"id":"https://openalex.org/W2110483955","doi":"https://doi.org/10.1017/s1351324997001629","title":"Finite state segmentation of discourse into clauses","display_name":"Finite state segmentation of discourse into clauses","publication_year":1996,"publication_date":"1996-12-01","ids":{"openalex":"https://openalex.org/W2110483955","doi":"https://doi.org/10.1017/s1351324997001629","mag":"2110483955"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324997001629","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324997001629","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050824789","display_name":"Eva Ejerhed","orcid":null},"institutions":[{"id":"https://openalex.org/I90267481","display_name":"Ume\u00e5 University","ror":"https://ror.org/05kb8h459","country_code":"SE","type":"education","lineage":["https://openalex.org/I90267481"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"EVA EJERHED","raw_affiliation_strings":["Department of Linguistics, University of Ume\u00e5, S-90187 Ume\u00e5, Sweden. e-mail:","Department of Linguistics, University of Ume\u00e5, S-90187 Ume\u00e5, Sweden. e-mail: ejerhed@ling.umu.se#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Linguistics, University of Ume\u00e5, S-90187 Ume\u00e5, Sweden. e-mail:","institution_ids":["https://openalex.org/I90267481"]},{"raw_affiliation_string":"Department of Linguistics, University of Ume\u00e5, S-90187 Ume\u00e5, Sweden. e-mail: ejerhed@ling.umu.se#TAB#","institution_ids":["https://openalex.org/I90267481"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5050824789"],"corresponding_institution_ids":["https://openalex.org/I90267481"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.17336294,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"2","issue":"4","first_page":"355","last_page":"364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8508224487304688},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6629273295402527},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6058380007743835},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5917884111404419},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5547010898590088},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.46834155917167664},{"id":"https://openalex.org/keywords/newspaper","display_name":"Newspaper","score":0.46364015340805054},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.45487770438194275},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.43219631910324097},{"id":"https://openalex.org/keywords/market-segmentation","display_name":"Market segmentation","score":0.4269729256629944},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36397624015808105},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.34939271211624146}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8508224487304688},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6629273295402527},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6058380007743835},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5917884111404419},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5547010898590088},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.46834155917167664},{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.46364015340805054},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.45487770438194275},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.43219631910324097},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.4269729256629944},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36397624015808105},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.34939271211624146},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C112698675","wikidata":"https://www.wikidata.org/wiki/Q37038","display_name":"Advertising","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324997001629","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324997001629","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1558668927","https://openalex.org/W1578408629","https://openalex.org/W2004570537","https://openalex.org/W2055438451","https://openalex.org/W2061271742","https://openalex.org/W2097125878","https://openalex.org/W2140482227","https://openalex.org/W2477816888","https://openalex.org/W2951562155"],"related_works":["https://openalex.org/W2376554757","https://openalex.org/W612150824","https://openalex.org/W2361959990","https://openalex.org/W1596512750","https://openalex.org/W2383443050","https://openalex.org/W2367702734","https://openalex.org/W4214823418","https://openalex.org/W3099325276","https://openalex.org/W1769291553","https://openalex.org/W563979812"],"abstract_inverted_index":{"The":[0,66,92,151],"paper":[1],"presents":[2],"background":[3],"and":[4,29,55,120,147,173],"motivation":[5],"for":[6],"a":[7,44,87],"processing":[8],"model":[9],"that":[10,15,158],"segments":[11],"discourse":[12],"into":[13,84],"units":[14],"are":[16],"simple,":[17],"non-nested":[18],"clauses,":[19,85],"prior":[20],"to":[21,169,178],"the":[22,48,59,100,122,159],"recognition":[23],"of":[24,34,39,47,62,69,78,108,115,132,143,153],"clause":[25,89,93,161],"internal":[26],"phrasal":[27],"constituents,":[28],"experimental":[30],"results":[31,40,70,152],"in":[32,52,75],"support":[33],"this":[35],"model.":[36],"One":[37],"set":[38,68],"is":[41,71,97,164],"derived":[42,72],"from":[43,73,99,111,121],"statistical":[45],"reanalysis":[46],"Swedish":[49,81,109,133],"empirical":[50],"data":[51,96],"Strangert,":[53],"Ejerhed":[54],"Huber":[56],"1993":[57,127],"concerning":[58],"linguistic":[60],"structure":[61],"major":[63],"prosodic":[64],"units.":[65],"other":[67],"experiments":[74,156],"segmenting":[76],"part":[77,114],"speech":[79,116],"annotated":[80,117],"text":[82],"corpora":[83],"using":[86],"new":[88],"segmentation":[90,162],"algorithm.":[91],"segmented":[94],"corpus":[95,124],"taken":[98],"Stockholm":[101],"Ume\u00e5":[102,123],"Corpus":[103],"(SUC),":[104],"1":[105],"M":[106,130],"words":[107,131],"texts":[110],"different":[112],"genres,":[113],"by":[118,138],"hand,":[119],"DAGENS":[125],"INDUSTRI":[126],"(DI93),":[128],"5":[129],"financial":[134],"newspaper":[135],"text,":[136,172],"processed":[137],"fully":[139],"automatic":[140],"means":[141],"consisting":[142],"tokenizing,":[144],"lexical":[145],"analysis,":[146],"probabilistic":[148],"POS":[149],"tagging.":[150],"these":[154],"two":[155],"show":[157],"proposed":[160],"algorithm":[163],"96%":[165],"correct":[166,175],"when":[167,176],"applied":[168,177],"manually":[170],"tagged":[171,180],"91%":[174],"probabilistically":[179],"text.":[181]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
