{"id":"https://openalex.org/W2127285286","doi":"https://doi.org/10.1145/1878101.1878109","title":"Story segmentation for speech transcripts in sparse data conditions","display_name":"Story segmentation for speech transcripts in sparse data conditions","publication_year":2010,"publication_date":"2010-10-29","ids":{"openalex":"https://openalex.org/W2127285286","doi":"https://doi.org/10.1145/1878101.1878109","mag":"2127285286"},"language":"en","primary_location":{"id":"doi:10.1145/1878101.1878109","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1878101.1878109","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 international workshop on Searching spontaneous conversational speech","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003498005","display_name":"Laurens van der Werff","orcid":null},"institutions":[{"id":"https://openalex.org/I94624287","display_name":"University of Twente","ror":"https://ror.org/006hf6230","country_code":"NL","type":"education","lineage":["https://openalex.org/I94624287"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Laurens van der Werff","raw_affiliation_strings":["University of Twente, Enschede, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Twente, Enschede, Netherlands","institution_ids":["https://openalex.org/I94624287"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5003498005"],"corresponding_institution_ids":["https://openalex.org/I94624287"],"apc_list":null,"apc_paid":null,"fwci":0.9327,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.81833702,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"33","last_page":"38"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8541532754898071},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6065735816955566},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.5814887285232544},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5446544885635376},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.529505729675293},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4919295310974121},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.4907153844833374},{"id":"https://openalex.org/keywords/wordnet","display_name":"WordNet","score":0.4802020788192749},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4792899489402771},{"id":"https://openalex.org/keywords/market-segmentation","display_name":"Market segmentation","score":0.4424835741519928},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4291597604751587},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38244885206222534}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8541532754898071},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6065735816955566},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.5814887285232544},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5446544885635376},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.529505729675293},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4919295310974121},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.4907153844833374},{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.4802020788192749},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4792899489402771},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.4424835741519928},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4291597604751587},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38244885206222534},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1878101.1878109","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1878101.1878109","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2010 international workshop on Searching spontaneous conversational speech","raw_type":"proceedings-article"},{"id":"pmh:oai:ris.utwente.nl:openaire_cris_publications/c48d8485-f2b1-48dd-a72d-c83d3f13a406","is_oa":false,"landing_page_url":"https://research.utwente.nl/en/publications/c48d8485-f2b1-48dd-a72d-c83d3f13a406","pdf_url":null,"source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"van der Werff, L B 2010, Story Segmentation for Speech Transcripts in Sparse Data Conditions. in Proceedings of the 2010 ACM International Workshop on Searching Spontaneous Conversational Speech, SSCS '10. ACM Multimedia, Association for Computing Machinery, New York, pp. 33-38, ACM/SIGIR International Workshop on Searching Spontaneous Conversational Speech, SSCS 2010, Florence, Italy, 29/10/10. https://doi.org/10.1145/1878101.1878109","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:ris.utwente.nl:publications/c48d8485-f2b1-48dd-a72d-c83d3f13a406","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922991","display_name":"University of Twente Research Information","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1205460133","https://openalex.org/W1522951677","https://openalex.org/W1548080502","https://openalex.org/W1557074680","https://openalex.org/W1606942729","https://openalex.org/W1710422233","https://openalex.org/W1828401780","https://openalex.org/W2016243284","https://openalex.org/W2038721957","https://openalex.org/W2100873065","https://openalex.org/W2100935296","https://openalex.org/W2131133093","https://openalex.org/W2153252192","https://openalex.org/W2487523968","https://openalex.org/W2949496004","https://openalex.org/W2997961850","https://openalex.org/W4285719527","https://openalex.org/W4290864872"],"related_works":["https://openalex.org/W2592395359","https://openalex.org/W2535231171","https://openalex.org/W2045342254","https://openalex.org/W1501331687","https://openalex.org/W2326647871","https://openalex.org/W4205247302","https://openalex.org/W2468652214","https://openalex.org/W2501551404","https://openalex.org/W1504527458","https://openalex.org/W2130144716"],"abstract_inverted_index":{"Information":[0],"Retrieval":[1],"systems":[2],"determine":[3],"relevance":[4],"by":[5,25,47],"comparing":[6],"information":[7,160,220],"needs":[8],"with":[9],"the":[10,57,72,93,131,138,146,162,180,185,206,210],"content":[11,58],"of":[12,38,61,71,95,112,121,133,137,212],"potential":[13],"retrieval":[14,32,186],"units.":[15],"Unlike":[16],"most":[17,70],"textual":[18],"data,":[19],"automatically":[20,48,213],"generated":[21],"speech":[22,62,113,215],"transcripts":[23,216],"cannot":[24],"default":[26],"be":[27,45,231],"easily":[28],"divided":[29],"into":[30],"obvious":[31],"units":[33],"due":[34,81],"to":[35,82,173,195],"a":[36,101,153,170,189,196,225],"lack":[37,94],"explicit":[39],"structural":[40],"markers.":[41],"This":[42],"problem":[43],"can":[44,99,230],"addressed":[46],"detecting":[49],"topically":[50],"cohesive":[51],"segments,":[52],"or":[53],"stories.":[54],"However,":[55],"when":[56],"collection":[59,125],"consists":[60],"from":[63,124],"less":[64],"formal":[65],"domains":[66],"than":[67],"broadcast":[68],"news,":[69],"standard":[73],"automatic":[74,110],"boundary":[75],"detection":[76],"methods":[77,108,140],"are":[78,115,118,141,148],"potentially":[79],"unsuitable":[80],"their":[83,122],"reliance":[84],"on":[85,143,152,169],"learned":[86],"features.":[87],"In":[88,104],"particular":[89],"for":[90,109,188,217,233],"conversational":[91],"speech,":[92],"adequate":[96],"training":[97,134],"data":[98,236],"present":[100],"significant":[102],"issue.":[103],"this":[105],"paper":[106],"four":[107,139],"segmentation":[111,155],"transcriptions":[114],"compared.":[116],"These":[117],"selected":[119],"because":[120],"independence":[123],"specific":[126],"knowledge":[127],"and":[128,164,199],"implemented":[129],"without":[130],"use":[132,218],"data.":[135],"Two":[136],"based":[142,151],"existing":[144],"algorithms,":[145],"others":[147],"novel":[149],"approaches":[150],"dynamic":[154],"algorithm":[156],"(QDSA)":[157],"that":[158,224],"incorporates":[159],"about":[161],"query,":[163],"WordNet.":[165],"Experiments":[166],"were":[167,193],"done":[168],"task":[171,211],"similar":[172],"TREC":[174],"SDR":[175],"unknown":[176],"boundaries":[177],"condition.":[178],"For":[179,209],"best":[181],"performing":[182],"system,":[183],"QDSA,":[184],"scores":[187],"tfidf-type":[190],"ranking":[191],"function":[192],"equivalent":[194],"reference":[197],"segmentation,":[198],"improved":[200],"through":[201],"document":[202],"length":[203],"normalization":[204],"using":[205],"bm25/Okapi":[207],"method.":[208],"segmenting":[214],"in":[219],"retrieval,":[221],"we":[222],"conclude":[223],"training-poor":[226],"processing":[227],"paradigm":[228],"which":[229],"crucial":[232],"handling":[234],"surprise":[235],"is":[237],"feasible.":[238]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
