{"id":"https://openalex.org/W2251501723","doi":"https://doi.org/10.18653/v1/w15-3102","title":"Create a Manual Chinese Word Segmentation Dataset Using Crowdsourcing Method","display_name":"Create a Manual Chinese Word Segmentation Dataset Using Crowdsourcing Method","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W2251501723","doi":"https://doi.org/10.18653/v1/w15-3102","mag":"2251501723"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w15-3102","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w15-3102","pdf_url":"https://www.aclweb.org/anthology/W15-3102.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Eighth SIGHAN Workshop on Chinese Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W15-3102.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102971767","display_name":"Shichang Wang","orcid":"https://orcid.org/0000-0002-7633-8118"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Shichang Wang","raw_affiliation_strings":["Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024924150","display_name":"Chu\u2010Ren Huang","orcid":"https://orcid.org/0000-0002-8526-5520"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chu-Ren Huang","raw_affiliation_strings":["Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389413","display_name":"Yao Yao","orcid":"https://orcid.org/0000-0001-8825-2680"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yao Yao","raw_affiliation_strings":["Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017602070","display_name":"Angel Chan","orcid":"https://orcid.org/0000-0002-9547-0210"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Angel Chan","raw_affiliation_strings":["Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies The Hong Kong Polytechnic University Hung Hom, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102971767"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12525419,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.9658836126327515},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7897772789001465},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7169246077537537},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7120155692100525},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6389085054397583},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.6016621589660645},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.597216784954071},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5753806233406067},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4892549514770508},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4526394307613373},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1369529366493225},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1261546015739441}],"concepts":[{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.9658836126327515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7897772789001465},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7169246077537537},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7120155692100525},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6389085054397583},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.6016621589660645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.597216784954071},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5753806233406067},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4892549514770508},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4526394307613373},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1369529366493225},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1261546015739441},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w15-3102","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w15-3102","pdf_url":"https://www.aclweb.org/anthology/W15-3102.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Eighth SIGHAN Workshop on Chinese Language Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:ira.lib.polyu.edu.hk:10397/68049","is_oa":false,"landing_page_url":"http://hdl.handle.net/10397/68049","pdf_url":null,"source":{"id":"https://openalex.org/S4306400205","display_name":"PolyU Institutional Research Archive (Hong Kong Polytechnic University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I14243506","host_organization_name":"Hong Kong Polytechnic University","host_organization_lineage":["https://openalex.org/I14243506"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":{"id":"doi:10.18653/v1/w15-3102","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w15-3102","pdf_url":"https://www.aclweb.org/anthology/W15-3102.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Eighth SIGHAN Workshop on Chinese Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5799999833106995}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2251501723.pdf","grobid_xml":"https://content.openalex.org/works/W2251501723.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W61584101","https://openalex.org/W1648664160","https://openalex.org/W1905966190","https://openalex.org/W1970381522","https://openalex.org/W1976965564","https://openalex.org/W1979557360","https://openalex.org/W2031918681","https://openalex.org/W2033145080","https://openalex.org/W2051991014","https://openalex.org/W2058738785","https://openalex.org/W2072377095","https://openalex.org/W2080211902","https://openalex.org/W2086676398","https://openalex.org/W2098865355","https://openalex.org/W2106568252","https://openalex.org/W2127849236","https://openalex.org/W2141708418","https://openalex.org/W2250561386","https://openalex.org/W2250575333","https://openalex.org/W3122078363","https://openalex.org/W3123895079","https://openalex.org/W3200634688","https://openalex.org/W3216014454","https://openalex.org/W4243469581","https://openalex.org/W4248915016"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W2393940967","https://openalex.org/W2385598138","https://openalex.org/W2346578824","https://openalex.org/W2366925922","https://openalex.org/W2159591557","https://openalex.org/W2115592387","https://openalex.org/W2905950556","https://openalex.org/W2112534334","https://openalex.org/W120168696"],"abstract_inverted_index":{"The":[0],"manual":[1,22,81],"Chinese":[2,28,117,162,183,186],"word":[3,23,118,180],"segmentation":[4,24,45,82,119],"dataset":[5,96,157],"WordSegCHC":[6],"1.0":[7],"which":[8,103],"was":[9,67],"built":[10],"by":[11,69],"eight":[12],"crowdsourcing":[13,50,114,128,141],"tasks":[14,51],"conducted":[15],"on":[16],"the":[17,21,41,53,89,91,95,113,122,127,138,155,166,177],"Crowdflower":[18],"platform":[19],"contains":[20],"data":[25,106,136,143],"of":[26,55,58,94,140,168,179,182],"152":[27],"sentences":[29,42],"whose":[30],"length":[31],"ranges":[32],"from":[33,61],"20":[34],"to":[35,63,87,99,116,165],"46":[36],"characters":[37],"without":[38],"punctuations.":[39],"All":[40],"received":[43],"200":[44],"responses":[46],"in":[47,146,152,161,176],"their":[48],"corresponding":[49],"and":[52,121,171,185],"numbers":[54],"valid":[56],"response":[57],"them":[59],"range":[60],"123":[62],"143":[64],"(each":[65],"sentence":[66],"segmented":[68],"more":[70],"than":[71],"120":[72],"subjects).":[73],"We":[74],"also":[75],"proposed":[76],"an":[77],"evaluation":[78],"method":[79,115,129],"called":[80],"error":[83],"rate":[84],"(M":[85],"SER)":[86],"evaluate":[88],"dataset;":[90],"M":[92],"SER":[93],"is":[97,130],"proved":[98],"be":[100,150],"very":[101],"low":[102],"indicates":[104],"reliable":[105],"quality.":[107],"In":[108],"this":[109,147],"work,":[110],"we":[111],"applied":[112],"task":[120],"results":[123],"confirmed":[124],"again":[125],"that":[126],"a":[131,159],"promising":[132],"tool":[133],"for":[134],"linguistic":[135,142],"collection;":[137],"framework":[139],"collection":[144],"used":[145],"work":[148],"can":[149],"reused":[151],"similar":[153],"tasks;":[154],"resultant":[156],"filled":[158],"gap":[160],"language":[163,187],"resources":[164],"best":[167],"our":[169],"knowledge,":[170],"it":[172],"has":[173],"potential":[174],"applications":[175],"research":[178],"intuition":[181],"speakers":[184],"processing.":[188]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
