{"id":"https://openalex.org/W2130614777","doi":"https://doi.org/10.1109/slt.2008.4777844","title":"Efficient sentence segmentation using syntactic features","display_name":"Efficient sentence segmentation using syntactic features","publication_year":2008,"publication_date":"2008-12-01","ids":{"openalex":"https://openalex.org/W2130614777","doi":"https://doi.org/10.1109/slt.2008.4777844","mag":"2130614777"},"language":"en","primary_location":{"id":"doi:10.1109/slt.2008.4777844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2008.4777844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE Spoken Language Technology Workshop","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071505335","display_name":"Beno\u00eet Favre","orcid":"https://orcid.org/0000-0002-9777-4613"},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Benoit Favre","raw_affiliation_strings":["International Computer Science Institute, Berkeley, USA","Int. Comput. Sci. Inst., Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute, Berkeley, USA","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Int. Comput. Sci. Inst., Berkeley, CA","institution_ids":["https://openalex.org/I1297971548"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015334291","display_name":"Dilek Hakkani-T\u00fcr","orcid":null},"institutions":[{"id":"https://openalex.org/I1297971548","display_name":"International Computer Science Institute","ror":"https://ror.org/01ewh7m12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1297971548"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dilek Hakkani-Tur","raw_affiliation_strings":["International Computer Science Institute, Berkeley, USA","Int. Comput. Sci. Inst., Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"International Computer Science Institute, Berkeley, USA","institution_ids":["https://openalex.org/I1297971548"]},{"raw_affiliation_string":"Int. Comput. Sci. Inst., Berkeley, CA","institution_ids":["https://openalex.org/I1297971548"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062886906","display_name":"Slav Petrov","orcid":"https://orcid.org/0000-0002-5505-4861"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Slav Petrov","raw_affiliation_strings":["Computer Science Division, University of California Berkeley, USA","Comput. Sci. Div., Univ. of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Computer Science Division, University of California Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Comput. Sci. Div., Univ. of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004921249","display_name":"Dan Klein","orcid":"https://orcid.org/0000-0002-8881-1902"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan Klein","raw_affiliation_strings":["Computer Science Division, University of California Berkeley, USA","Comput. Sci. Div., Univ. of California Berkeley, Berkeley, CA"],"affiliations":[{"raw_affiliation_string":"Computer Science Division, University of California Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Comput. Sci. Div., Univ. of California Berkeley, Berkeley, CA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5071505335"],"corresponding_institution_ids":["https://openalex.org/I1297971548"],"apc_list":null,"apc_paid":null,"fwci":3.59165359,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.9520303,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"24","issue":null,"first_page":"77","last_page":"80"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8452486395835876},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7486879825592041},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7263806462287903},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7024914026260376},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.650508463382721},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5826937556266785},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.541223406791687},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5173670649528503},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5077894330024719},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.46435144543647766}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8452486395835876},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7486879825592041},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7263806462287903},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7024914026260376},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.650508463382721},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5826937556266785},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.541223406791687},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5173670649528503},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5077894330024719},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.46435144543647766}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt.2008.4777844","is_oa":false,"landing_page_url":"https://doi.org/10.1109/slt.2008.4777844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE Spoken Language Technology Workshop","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W3313028","https://openalex.org/W94670513","https://openalex.org/W189129698","https://openalex.org/W204933015","https://openalex.org/W1504308419","https://openalex.org/W1551104980","https://openalex.org/W2097584131","https://openalex.org/W2158188757","https://openalex.org/W2161653775","https://openalex.org/W6600128079","https://openalex.org/W6603793917","https://openalex.org/W6607678144","https://openalex.org/W6632973184","https://openalex.org/W6683226453"],"related_works":["https://openalex.org/W579810227","https://openalex.org/W2952780262","https://openalex.org/W2979495269","https://openalex.org/W2375873920","https://openalex.org/W2392917763","https://openalex.org/W2083429127","https://openalex.org/W2146114872","https://openalex.org/W2392060890","https://openalex.org/W2358855848","https://openalex.org/W2095908250"],"abstract_inverted_index":{"To":[0],"enable":[1],"downstream":[2],"language":[3,107,140],"processing,automatic":[4],"speech":[5,148],"recognition":[6],"output":[7,150],"must":[8],"be":[9],"segmented":[10],"into":[11],"its":[12],"individual":[13],"sentences.":[14],"Previous":[15],"sentence":[16,135],"segmentation":[17,136],"systems":[18],"have":[19],"typically":[20],"been":[21],"very":[22],"local,using":[23],"low-level":[24,118],"prosodic":[25],"and":[26,146],"lexical":[27],"features":[28],"to":[29,35,57,74,88],"independently":[30],"decide":[31],"whether":[32],"or":[33],"not":[34],"segment":[36],"at":[37],"each":[38],"word":[39],"boundary":[40],"position.":[41],"In":[42],"this":[43],"work,we":[44],"leverage":[45],"global":[46,111],"syntactic":[47,51,68,106,112],"information":[48],"from":[49,151],"a":[50,78,121],"parser,":[52],"which":[53,82],"is":[54,71,83,96],"better":[55],"able":[56],"capture":[58],"long":[59],"distance":[60],"dependencies.":[61],"While":[62],"some":[63],"previous":[64],"work":[65],"has":[66],"included":[67],"features,":[69],"ours":[70],"the":[72,129],"first":[73],"do":[75],"so":[76],"in":[77,120],"tractable,":[79],"lattice-based":[80],"way,":[81],"crucial":[84],"for":[85,134],"scaling":[86],"up":[87],"long-sentence":[89],"contexts.":[90],"Specifically,":[91],"an":[92],"initial":[93],"hypothesis":[94],"lattice":[95],"constructed":[97],"using":[98],"local":[99,117],"features.":[100],"Candidate":[101],"sentences":[102],"are":[103,114],"then":[104],"assigned":[105],"model":[108,133],"scores.":[109],"These":[110],"scores":[113,119],"combined":[115],"with":[116],"log-linear":[122],"model.":[123],"The":[124],"resulting":[125],"system":[126],"significantly":[127],"outperforms":[128],"most":[130],"popular":[131],"long-span":[132],"(the":[137],"hidden":[138],"event":[139],"model)":[141],"on":[142],"both":[143],"reference":[144],"text":[145],"automatic":[147],"recognizer":[149],"news":[152],"broadcasts.":[153]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
