{"id":"https://openalex.org/W2217181929","doi":"https://doi.org/10.1109/icsda.2015.7357891","title":"Automatic word segmentation for spoken Cantonese","display_name":"Automatic word segmentation for spoken Cantonese","publication_year":2015,"publication_date":"2015-10-01","ids":{"openalex":"https://openalex.org/W2217181929","doi":"https://doi.org/10.1109/icsda.2015.7357891","mag":"2217181929"},"language":"en","primary_location":{"id":"doi:10.1109/icsda.2015.7357891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsda.2015.7357891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 International Conference Oriental COCOSDA held jointly with 2015 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102854471","display_name":"Roxana Fung","orcid":"https://orcid.org/0000-0003-3678-2174"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Roxana Fung","raw_affiliation_strings":["Department of Chinese and Bilingual Studies, The Hong Kong Polytechnic University, Hung Hom, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies, The Hong Kong Polytechnic University, Hung Hom, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041939827","display_name":"Brigitte Bigi","orcid":"https://orcid.org/0000-0003-1834-6918"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210166510","display_name":"Laboratoire Parole et Langage","ror":"https://ror.org/05whq8x35","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I21491767","https://openalex.org/I4210150854","https://openalex.org/I4210166510"]},{"id":"https://openalex.org/I21491767","display_name":"Aix-Marseille Universit\u00e9","ror":"https://ror.org/035xkbk20","country_code":"FR","type":"education","lineage":["https://openalex.org/I21491767"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Brigitte Bigi","raw_affiliation_strings":["Laboratoire Parole et Langage CNRS, Aix-Marseille Universit\u00e9, Aix-en-Provence, France"],"affiliations":[{"raw_affiliation_string":"Laboratoire Parole et Langage CNRS, Aix-Marseille Universit\u00e9, Aix-en-Provence, France","institution_ids":["https://openalex.org/I4210166510","https://openalex.org/I1294671590","https://openalex.org/I21491767"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102854471"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":0.94290274,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.88098539,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"196","last_page":"201"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7597168684005737},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6164675951004028},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6037265062332153},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5780186057090759},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5746882557868958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5246479511260986},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.42047828435897827},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3323330581188202}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7597168684005737},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6164675951004028},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6037265062332153},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5780186057090759},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5746882557868958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5246479511260986},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.42047828435897827},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3323330581188202},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/icsda.2015.7357891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsda.2015.7357891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 International Conference Oriental COCOSDA held jointly with 2015 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01455312v1","is_oa":false,"landing_page_url":"https://hal.science/hal-01455312","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Oriental COCOSDA and Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE), 2015, Shanghai, Switzerland. pp.196--201, &#x27E8;10.1109/ICSDA.2015.7357891&#x27E9;","raw_type":"Conference papers"},{"id":"pmh:oai:ira.lib.polyu.edu.hk:10397/65194","is_oa":false,"landing_page_url":"http://hdl.handle.net/10397/65194","pdf_url":null,"source":{"id":"https://openalex.org/S4306400205","display_name":"PolyU Institutional Research Archive (Hong Kong Polytechnic University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I14243506","host_organization_name":"Hong Kong Polytechnic University","host_organization_lineage":["https://openalex.org/I14243506"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6499999761581421,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332709","display_name":"President's Council of Cornell Women","ror":"https://ror.org/05bnh6r87"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1412233304","https://openalex.org/W1502958118","https://openalex.org/W1558333962","https://openalex.org/W1847536016","https://openalex.org/W1986585666","https://openalex.org/W2077696427","https://openalex.org/W2098594428","https://openalex.org/W2101857472","https://openalex.org/W2129227538","https://openalex.org/W2129608356","https://openalex.org/W2163377725","https://openalex.org/W2251387359","https://openalex.org/W2251701780","https://openalex.org/W2544930972","https://openalex.org/W2760736243","https://openalex.org/W3145501851","https://openalex.org/W3216401400","https://openalex.org/W4254353051","https://openalex.org/W4300416988","https://openalex.org/W6633374533","https://openalex.org/W6638932672","https://openalex.org/W6675074630","https://openalex.org/W6675095910","https://openalex.org/W6679251009","https://openalex.org/W6683955732","https://openalex.org/W6691416770","https://openalex.org/W6744840084","https://openalex.org/W6843615328"],"related_works":["https://openalex.org/W2251446894","https://openalex.org/W2172629291","https://openalex.org/W2380773642","https://openalex.org/W2384559435","https://openalex.org/W1597333669","https://openalex.org/W2337707338","https://openalex.org/W2393940967","https://openalex.org/W2159591557","https://openalex.org/W2785359773","https://openalex.org/W2058548953"],"abstract_inverted_index":{"Though":[0],"Cantonese":[1,19,63,114,143],"is":[2,35,71,87],"the":[3,26,44,55,75,99,105,120,134,139],"most":[4],"influential":[5],"variety":[6],"of":[7,18,29,46,59,74,98,107,111,141],"Chinese":[8],"other":[9],"than":[10],"Mandarin,":[11],"there":[12],"are":[13],"only":[14],"a":[15,31,36,72,88,93,112],"limited":[16],"number":[17],"corpora":[20,144],"available":[21],"for":[22,67,145],"linguistic":[23],"studies.":[24,148],"Among":[25],"essential":[27],"steps":[28],"building":[30],"corpus,":[32],"word":[33,48,64],"segmentation":[34],"necessary":[37],"but":[38],"highly":[39],"challenging":[40],"task":[41],"due":[42],"to":[43,80],"lack":[45],"clear":[47],"boundary":[49],"in":[50,129],"Cantonese.":[51,68],"This":[52],"paper":[53],"reports":[54],"construction":[56],"and":[57,117,125],"evaluation":[58],"an":[60],"open-source":[61],"automatic":[62],"segmenter":[65],"developed":[66],"The":[69,96],"tool":[70,100,121,135],"component":[73],"multilingual":[76],"SPPAS":[77],"program":[78],"designed":[79],"be":[81],"used":[82],"directly":[83],"by":[84,103],"linguists.":[85],"It":[86],"free":[89],"software":[90],"distributed":[91],"under":[92],"GPL":[94],"license.":[95],"effectiveness":[97],"was":[101],"evaluated":[102],"comparing":[104],"result":[106],"segmenting":[108],"some":[109],"samples":[110],"spoken":[113],"corpus":[115],"manually":[116],"automatically":[118],"using":[119],"developed.":[122],"High":[123],"precision":[124],"recall":[126],"were":[127],"found":[128],"our":[130],"study.":[131],"Upon":[132],"completion,":[133],"would":[136],"definitely":[137],"promote":[138],"development":[140],"more":[142],"language":[146],"related":[147]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
