{"id":"https://openalex.org/W2963033440","doi":"https://doi.org/10.18653/v1/k15-1009","title":"Big Data Small Data, In Domain Out-of Domain, Known Word Unknown Word: The Impact of Word Representations on Sequence Labelling Tasks","display_name":"Big Data Small Data, In Domain Out-of Domain, Known Word Unknown Word: The Impact of Word Representations on Sequence Labelling Tasks","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W2963033440","doi":"https://doi.org/10.18653/v1/k15-1009","mag":"2963033440"},"language":"en","primary_location":{"id":"doi:10.18653/v1/k15-1009","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k15-1009","pdf_url":"https://www.aclweb.org/anthology/K15-1009.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth Conference on Computational Natural Language Learning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/K15-1009.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008486397","display_name":"Lizhen Qu","orcid":"https://orcid.org/0000-0002-7764-431X"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Lizhen Qu","raw_affiliation_strings":["NICTA, Australia","The Australian National University"],"affiliations":[{"raw_affiliation_string":"NICTA, Australia","institution_ids":["https://openalex.org/I42894916"]},{"raw_affiliation_string":"The Australian National University","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070175021","display_name":"Gabriela Ferraro","orcid":"https://orcid.org/0000-0003-3652-9689"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Gabriela Ferraro","raw_affiliation_strings":["NICTA, Australia","The Australian National University"],"affiliations":[{"raw_affiliation_string":"NICTA, Australia","institution_ids":["https://openalex.org/I42894916"]},{"raw_affiliation_string":"The Australian National University","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101622399","display_name":"Liyuan Zhou","orcid":"https://orcid.org/0000-0001-9046-6098"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Liyuan Zhou","raw_affiliation_strings":["NICTA, Australia"],"affiliations":[{"raw_affiliation_string":"NICTA, Australia","institution_ids":["https://openalex.org/I42894916"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103013344","display_name":"Weiwei Hou","orcid":"https://orcid.org/0000-0002-0758-2870"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Weiwei Hou","raw_affiliation_strings":["NICTA, Australia"],"affiliations":[{"raw_affiliation_string":"NICTA, Australia","institution_ids":["https://openalex.org/I42894916"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014008069","display_name":"Nathan Schneider","orcid":"https://orcid.org/0000-0002-2745-0533"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nathan Schneider","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103085805","display_name":"Timothy Baldwin","orcid":"https://orcid.org/0000-0002-4445-1386"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Timothy Baldwin","raw_affiliation_strings":["NICTA, Australia","The University of Melbourne"],"affiliations":[{"raw_affiliation_string":"NICTA, Australia","institution_ids":["https://openalex.org/I42894916"]},{"raw_affiliation_string":"The University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5008486397"],"corresponding_institution_ids":["https://openalex.org/I118347636","https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":4.4483,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.95125828,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"83","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8383185863494873},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.767263650894165},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.756721019744873},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.7206313610076904},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6848334074020386},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.6386911273002625},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5566377639770508},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5275428295135498},{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.5024700164794922},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4588407278060913},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4490512013435364},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.43036454916000366},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.40413397550582886},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.34172582626342773},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.14749372005462646},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08806723356246948}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8383185863494873},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.767263650894165},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.756721019744873},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.7206313610076904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6848334074020386},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.6386911273002625},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5566377639770508},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5275428295135498},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.5024700164794922},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4588407278060913},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4490512013435364},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.43036454916000366},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40413397550582886},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.34172582626342773},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.14749372005462646},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08806723356246948},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/k15-1009","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k15-1009","pdf_url":"https://www.aclweb.org/anthology/K15-1009.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth Conference on Computational Natural Language Learning","raw_type":"proceedings-article"},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/270445","is_oa":true,"landing_page_url":"http://hdl.handle.net/1885/270445","pdf_url":null,"source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data Small Data, In Domain Out-of Domain, Known Word Unknown Word: The Impact of Word Representations on Sequence Labelling Tasks","raw_type":"Conference paper"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/87d9d9c9-e163-4c2a-aafd-9056a68d19ed","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/87d9d9c9-e163-4c2a-aafd-9056a68d19ed","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Qu, L, Ferraro, G, Zhou, L, Hou, W, Schneider, N & Baldwin, T 2015, Big Data Small Data, In Domain Out-of Domain, Known Word Unknown Word: The Impact of Word Representations on Sequence Labelling Tasks. in Proceedings of the Nineteenth Conference on Computational Natural Language Learning. Beijing, China, pp. 83-93. < http://www.aclweb.org/anthology/K15-1009 >","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/87d9d9c9-e163-4c2a-aafd-9056a68d19ed","is_oa":false,"landing_page_url":"http://hdl.handle.net/20.500.11820/87d9d9c9-e163-4c2a-aafd-9056a68d19ed","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"doi:10.18653/v1/k15-1009","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k15-1009","pdf_url":"https://www.aclweb.org/anthology/K15-1009.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth Conference on Computational Natural Language Learning","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8299999833106995,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2963033440.pdf","grobid_xml":"https://content.openalex.org/works/W2963033440.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W141372029","https://openalex.org/W168564468","https://openalex.org/W371426616","https://openalex.org/W638675188","https://openalex.org/W1614298861","https://openalex.org/W1615991656","https://openalex.org/W1632114991","https://openalex.org/W1662133657","https://openalex.org/W1834149421","https://openalex.org/W1836521361","https://openalex.org/W1880262756","https://openalex.org/W1903393809","https://openalex.org/W1981617416","https://openalex.org/W2091273188","https://openalex.org/W2097998348","https://openalex.org/W2098921539","https://openalex.org/W2107611725","https://openalex.org/W2115847145","https://openalex.org/W2117130368","https://openalex.org/W2121227244","https://openalex.org/W2122922389","https://openalex.org/W2125031621","https://openalex.org/W2127314673","https://openalex.org/W2128634885","https://openalex.org/W2130903752","https://openalex.org/W2144578941","https://openalex.org/W2146502635","https://openalex.org/W2147880316","https://openalex.org/W2153579005","https://openalex.org/W2156515921","https://openalex.org/W2157807817","https://openalex.org/W2158139315","https://openalex.org/W2158899491","https://openalex.org/W2168596788","https://openalex.org/W2187089797","https://openalex.org/W2224490803","https://openalex.org/W2250539671","https://openalex.org/W2250861254","https://openalex.org/W2251291469","https://openalex.org/W2251703179","https://openalex.org/W2251803266","https://openalex.org/W2251874715","https://openalex.org/W2252238553","https://openalex.org/W2611669587","https://openalex.org/W2951299559","https://openalex.org/W2951714314","https://openalex.org/W2952087486","https://openalex.org/W2952230511","https://openalex.org/W4294170691","https://openalex.org/W4301691548"],"related_works":["https://openalex.org/W4302441680","https://openalex.org/W2170495237","https://openalex.org/W2609130030","https://openalex.org/W2963706742","https://openalex.org/W2952592458","https://openalex.org/W4289701088","https://openalex.org/W2886790750","https://openalex.org/W2020272874","https://openalex.org/W4286432911","https://openalex.org/W2252069213"],"abstract_inverted_index":{"Word":[0],"embeddings":[1,80,99,137],"-distributed":[2],"word":[3,36,72,79,98,124,136],"representations":[4],"that":[5,76,97,128],"can":[6],"be":[7],"learned":[8],"from":[9],"unlabelled":[10],"data":[11],"-have":[12],"been":[13],"shown":[14],"to":[15,92,101],"have":[16],"high":[17],"utility":[18],"in":[19,39],"many":[20],"natural":[21],"language":[22],"processing":[23],"applications.":[24],"In":[25],"this":[26],"paper,":[27],"we":[28,141],"perform":[29],"an":[30],"extrinsic":[31],"evaluation":[32],"of":[33,42,61,68,71,109],"four":[34,43],"popular":[35],"embedding":[37,125],"methods":[38],"the":[40,62,66,122],"context":[41],"sequence":[44],"labelling":[45],"tasks:":[46],"part-of-speech":[47],"tagging,":[48],"syntactic":[49],"chunking,":[50],"named":[51],"entity":[52],"recognition,":[53],"and":[54,96,106,127],"multiword":[55],"expression":[56],"identification.":[57],"A":[58],"particular":[59],"focus":[60],"paper":[63],"is":[64,118],"analysing":[65],"effects":[67],"task-based":[69],"updating":[70],"representations.":[73],"We":[74],"show":[75],"when":[77],"using":[78],"as":[81,83,85],"features,":[82],"few":[84],"several":[86],"hundred":[87],"training":[88],"instances":[89],"are":[90,132],"sufficient":[91],"achieve":[93],"competitive":[94,134],"results,":[95],"lead":[100],"improvements":[102],"over":[103],"out-of-vocabulary":[104],"words":[105],"also":[107],"out":[108],"domain.":[110],"Perhaps":[111],"more":[112],"surprisingly,":[113],"our":[114],"results":[115],"indicate":[116],"there":[117],"little":[119],"difference":[120],"between":[121],"different":[123],"methods,":[126],"simple":[129],"Brown":[130],"clusters":[131],"often":[133],"with":[135],"across":[138],"all":[139],"tasks":[140],"consider.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
