{"id":"https://openalex.org/W2060035243","doi":"https://doi.org/10.1145/1781134.1781136","title":"A Linguistically Inspired Statistical Model for Chinese Punctuation Generation","display_name":"A Linguistically Inspired Statistical Model for Chinese Punctuation Generation","publication_year":2010,"publication_date":"2010-06-01","ids":{"openalex":"https://openalex.org/W2060035243","doi":"https://doi.org/10.1145/1781134.1781136","mag":"2060035243"},"language":"en","primary_location":{"id":"doi:10.1145/1781134.1781136","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1781134.1781136","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100528520","display_name":"Yuqing Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yuqing Guo","raw_affiliation_strings":["Toshiba (China) Research and Development Center","Toshiba (China) Research and Development Center#TAB#"],"affiliations":[{"raw_affiliation_string":"Toshiba (China) Research and Development Center","institution_ids":[]},{"raw_affiliation_string":"Toshiba (China) Research and Development Center#TAB#","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100386394","display_name":"Haifeng Wang","orcid":"https://orcid.org/0000-0002-0672-7468"},"institutions":[{"id":"https://openalex.org/I1292669757","display_name":"Toshiba (Japan)","ror":"https://ror.org/0326v3z14","country_code":"JP","type":"company","lineage":["https://openalex.org/I1292669757"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Haifeng Wang","raw_affiliation_strings":["Toshiba (China) Research and Development Center","Toshiba (China) Research and Development Center#TAB#"],"affiliations":[{"raw_affiliation_string":"Toshiba (China) Research and Development Center","institution_ids":[]},{"raw_affiliation_string":"Toshiba (China) Research and Development Center#TAB#","institution_ids":["https://openalex.org/I1292669757"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049194403","display_name":"Josef van Genabith","orcid":"https://orcid.org/0000-0003-1322-7944"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Josef van Genabith","raw_affiliation_strings":["Dublin City University","Dublin City University#TAB#"],"affiliations":[{"raw_affiliation_string":"Dublin City University","institution_ids":["https://openalex.org/I42934936"]},{"raw_affiliation_string":"Dublin City University#TAB#","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100528520"],"corresponding_institution_ids":["https://openalex.org/I1292669757"],"apc_list":null,"apc_paid":null,"fwci":4.5105,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.94493649,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"9","issue":"2","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/punctuation","display_name":"Punctuation","score":0.9841040372848511},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7628618478775024},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6355836391448975},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5481054782867432},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5427378416061401},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.501333475112915},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4277503490447998},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41231685876846313}],"concepts":[{"id":"https://openalex.org/C540372491","wikidata":"https://www.wikidata.org/wiki/Q82622","display_name":"Punctuation","level":2,"score":0.9841040372848511},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7628618478775024},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6355836391448975},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5481054782867432},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5427378416061401},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.501333475112915},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4277503490447998},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41231685876846313}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1781134.1781136","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1781134.1781136","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8199999928474426}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W52045974","https://openalex.org/W73658759","https://openalex.org/W93937563","https://openalex.org/W129752336","https://openalex.org/W132093011","https://openalex.org/W1493882948","https://openalex.org/W1544916875","https://openalex.org/W1550597138","https://openalex.org/W1571903643","https://openalex.org/W1574901103","https://openalex.org/W1587490638","https://openalex.org/W1593045043","https://openalex.org/W1627331591","https://openalex.org/W1773803948","https://openalex.org/W1964823536","https://openalex.org/W1984527503","https://openalex.org/W2003458432","https://openalex.org/W2018560257","https://openalex.org/W2113851376","https://openalex.org/W2118020653","https://openalex.org/W2123132948","https://openalex.org/W2123343069","https://openalex.org/W2131846381","https://openalex.org/W2305592425","https://openalex.org/W3023740747","https://openalex.org/W3037679343","https://openalex.org/W4249866941","https://openalex.org/W4285719527","https://openalex.org/W6632460732"],"related_works":["https://openalex.org/W2936002343","https://openalex.org/W2188883480","https://openalex.org/W1592364192","https://openalex.org/W656840002","https://openalex.org/W1605117403","https://openalex.org/W2381416480","https://openalex.org/W2380599343","https://openalex.org/W2181793145","https://openalex.org/W3163320880","https://openalex.org/W2365703075"],"abstract_inverted_index":{"This":[0],"article":[1],"investigates":[2],"a":[3,16,51,72,78,90,95,147,196,221],"relatively":[4],"underdeveloped":[5],"subject":[6],"in":[7,27,67,182,190,215,218,224],"natural":[8],"language":[9,150],"processing---the":[10],"generation":[11,202],"of":[12,32,113,185,193,200,209],"punctuation":[13,23,33,38,65,87,116,121,156,212],"marks.":[14],"From":[15],"theoretical":[17],"perspective,":[18],"we":[19,76],"study":[20],"16":[21],"Chinese":[22,29,64,103],"marks":[24,39,66,88,213],"as":[25,89],"defined":[26],"the":[28,61,101,106,142,174,183,191,201,210,225],"national":[30],"standard":[31],"usage,":[34],"and":[35,118,130,158,187],"categorize":[36],"these":[37],"into":[40],"three":[41],"different":[42],"types":[43],"according":[44],"to":[45,80,93,207],"their":[46],"syntactic":[47],"properties.":[48],"We":[49,162],"implement":[50],"three-tier":[52],"maximum":[53],"entropy":[54],"model":[55,108,144,151],"incorporating":[56],"linguistically-motivated":[57],"features":[58],"for":[59,115,120,128,132,155,160],"generating":[60],"commonly":[62],"used":[63],"unpunctuated":[68],"sentences":[69],"output":[70,203],"by":[71],"surface":[73],"realizer.":[74],"Furthermore,":[75],"present":[77],"method":[79],"automatically":[81],"extract":[82],"cue":[83],"words":[84],"indicating":[85],"sentence-final":[86],"specialized":[91],"feature":[92],"construct":[94],"more":[96],"precise":[97],"model.":[98],"Evaluating":[99],"on":[100,173],"Penn":[102],"Treebank":[104],"data,":[105],"MaxEnt":[107,143],"achieves":[109],"an":[110],"f":[111,180],"-score":[112],"79.83%":[114],"insertion":[117,129,157,179],"74.61%":[119],"restoration":[122,133,189],"using":[123,134],"gold":[124],"data":[125],"input,":[126],"79.50%":[127],"73.32%":[131],"parser-based":[135],"imperfect":[136],"input.":[137],"The":[138],"experiments":[139],"show":[140,163],"that":[141,152,164,205],"significantly":[145],"outperforms":[146],"baseline":[148],"5-gram":[149],"scores":[153],"54.99%":[154],"52.01%":[159],"restoration.":[161],"our":[165],"results":[166],"are":[167],"not":[168],"far":[169],"from":[170],"human":[171,178,188],"performance":[172],"same":[175],"task":[176],"with":[177],"-scores":[181],"range":[184,192],"81-87%":[186],"71-82%.":[194],"Finally,":[195],"manual":[197],"error":[198],"analysis":[199],"shows":[204],"close":[206],"40%":[208],"mismatched":[211],"do":[214],"fact":[216,222],"result":[217],"acceptable":[219],"choices,":[220],"obscured":[223],"automatic":[226],"string-matching":[227],"based":[228],"evaluation":[229],"scores.":[230]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
