{"id":"https://openalex.org/W2184055828","doi":"https://doi.org/10.1162/tacl_a_00174","title":"Segmentation for Efficient Supervised Language Annotation with an Explicit Cost-Utility Tradeoff","display_name":"Segmentation for Efficient Supervised Language Annotation with an Explicit Cost-Utility Tradeoff","publication_year":2014,"publication_date":"2014-12-01","ids":{"openalex":"https://openalex.org/W2184055828","doi":"https://doi.org/10.1162/tacl_a_00174","mag":"2184055828"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00174","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00174","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00174","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00174","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024736091","display_name":"Matthias Sperber","orcid":null},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Matthias Sperber","raw_affiliation_strings":["Karlsruhe Institute of Technology, Institute for Anthropomatics, Germany,"],"affiliations":[{"raw_affiliation_string":"Karlsruhe Institute of Technology, Institute for Anthropomatics, Germany,","institution_ids":["https://openalex.org/I102335020"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083547986","display_name":"Mirjam Simantzik","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mirjam Simantzik","raw_affiliation_strings":["Mobile Technologies GmbH, Germany,"],"affiliations":[{"raw_affiliation_string":"Mobile Technologies GmbH, Germany,","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068811427","display_name":"Graham Neubig","orcid":"https://orcid.org/0000-0002-2072-3789"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Graham Neubig","raw_affiliation_strings":["Nara Institute of Science and Technology, AHC Laboratory, Japan,"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology, AHC Laboratory, Japan,","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":["Nara Institute of Science and Technology, AHC Laboratory, Japan,"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology, AHC Laboratory, Japan,","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110453805","display_name":"Alex Waibel","orcid":null},"institutions":[{"id":"https://openalex.org/I102335020","display_name":"Karlsruhe Institute of Technology","ror":"https://ror.org/04t3en479","country_code":"DE","type":"education","lineage":["https://openalex.org/I102335020","https://openalex.org/I1305996414"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Alex Waibel","raw_affiliation_strings":["Karlsruhe Institute of Technology, Institute for Anthropomatics, Germany,"],"affiliations":[{"raw_affiliation_string":"Karlsruhe Institute of Technology, Institute for Anthropomatics, Germany,","institution_ids":["https://openalex.org/I102335020"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5020994673","https://openalex.org/A5024736091","https://openalex.org/A5068811427","https://openalex.org/A5083547986","https://openalex.org/A5110453805"],"corresponding_institution_ids":["https://openalex.org/I102335020","https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":2.1196,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.90097618,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"2","issue":null,"first_page":"169","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9183022975921631},{"id":"https://openalex.org/keywords/market-segmentation","display_name":"Market segmentation","score":0.7608761191368103},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.750590980052948},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5695075988769531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5689309239387512},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5475841164588928},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5256132483482361},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.486287921667099},{"id":"https://openalex.org/keywords/cognitive-load","display_name":"Cognitive load","score":0.44180023670196533},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4223769009113312},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.2573947012424469}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9183022975921631},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.7608761191368103},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.750590980052948},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5695075988769531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5689309239387512},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5475841164588928},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5256132483482361},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.486287921667099},{"id":"https://openalex.org/C61641136","wikidata":"https://www.wikidata.org/wiki/Q1107019","display_name":"Cognitive load","level":3,"score":0.44180023670196533},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4223769009113312},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.2573947012424469},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00174","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00174","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00174","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6e8fae6669ab410288964cfbda7e8a89","is_oa":true,"landing_page_url":"https://doaj.org/article/6e8fae6669ab410288964cfbda7e8a89","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 2 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00174","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00174","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/tacl_a_00174","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3829048491","display_name":null,"funder_award_id":"FP7/2007-2013","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G3911098002","display_name":"Bridges Across the Language Divide","funder_award_id":"287658","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G3981126665","display_name":null,"funder_award_id":"Seventh Framework Programme","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5593277320","display_name":null,"funder_award_id":"2007-2013","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5702163051","display_name":null,"funder_award_id":"FP7/2007","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2184055828.pdf","grobid_xml":"https://content.openalex.org/works/W2184055828.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W7341375","https://openalex.org/W58109815","https://openalex.org/W311217234","https://openalex.org/W1601649239","https://openalex.org/W1618905105","https://openalex.org/W1746819321","https://openalex.org/W1997188340","https://openalex.org/W2013025416","https://openalex.org/W2050810864","https://openalex.org/W2067760738","https://openalex.org/W2086168649","https://openalex.org/W2090179842","https://openalex.org/W2092840718","https://openalex.org/W2096507791","https://openalex.org/W2102162869","https://openalex.org/W2105410942","https://openalex.org/W2105677720","https://openalex.org/W2108351678","https://openalex.org/W2117898394","https://openalex.org/W2125094180","https://openalex.org/W2127863960","https://openalex.org/W2134756243","https://openalex.org/W2138517257","https://openalex.org/W2165188277","https://openalex.org/W2171671120","https://openalex.org/W2183100634","https://openalex.org/W2186839874","https://openalex.org/W2251311344","https://openalex.org/W2398009384","https://openalex.org/W2402417620","https://openalex.org/W4231912967"],"related_works":["https://openalex.org/W2592395359","https://openalex.org/W2045342254","https://openalex.org/W2535231171","https://openalex.org/W1501331687","https://openalex.org/W4255512592","https://openalex.org/W4205247302","https://openalex.org/W2501551404","https://openalex.org/W2326647871","https://openalex.org/W2468652214","https://openalex.org/W1504527458"],"abstract_inverted_index":{"In":[0,100],"this":[1],"paper,":[2],"we":[3],"study":[4,148],"the":[5,41,69,79,105,116,128],"problem":[6,141],"of":[7,12,71,98,132],"manually":[8],"correcting":[9],"automatic":[10],"annotations":[11],"natural":[13,158],"language":[14,159],"in":[15],"as":[16,20],"efficient":[17],"a":[18,24,29,96,138],"manner":[19],"possible.":[21],"We":[22],"introduce":[23],"method":[25,113],"for":[26,51,59],"automatically":[27],"segmenting":[28],"corpus":[30],"into":[31,40],"chunks":[32],"such":[33],"that":[34,75,118],"many":[35],"uncertain":[36],"labels":[37,74,86,94],"are":[38,76,87],"grouped":[39],"same":[42],"chunk,":[43],"while":[44],"human":[45],"supervision":[46,120],"can":[47],"be":[48,57],"omitted":[49],"altogether":[50],"other":[52],"segments.":[53],"A":[54,146],"tradeoff":[55],"must":[56],"found":[58],"segment":[60,135],"sizes.":[61],"Choosing":[62],"short":[63],"segments":[64,103],"allows":[65],"us":[66],"to":[67,109,126],"reduce":[68,104],"number":[70],"highly":[72],"confident":[73],"supervised":[77],"by":[78,122],"annotator,":[80],"which":[81],"is":[82,95],"useful":[83],"because":[84],"these":[85,143],"often":[88],"already":[89],"correct":[90,93],"and":[91,130,136,164],"supervising":[92,133],"waste":[97],"effort.":[99],"contrast,":[101],"long":[102],"cognitive":[106],"effort":[107],"due":[108],"context":[110],"switches.":[111],"Our":[112],"helps":[114],"find":[115],"segmentation":[117],"optimizes":[119],"efficiency":[121],"defining":[123],"user":[124,147],"models":[125],"predict":[127],"cost":[129],"utility":[131],"each":[134],"solving":[137],"constrained":[139],"optimization":[140],"balancing":[142],"contradictory":[144],"objectives.":[145],"demonstrates":[149],"noticeable":[150],"gains":[151],"over":[152],"pre-segmented,":[153],"confidence-ordered":[154],"baselines":[155],"on":[156],"two":[157],"processing":[160],"tasks:":[161],"speech":[162],"transcription":[163],"word":[165],"segmentation.":[166]},"counts_by_year":[{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
