{"id":"https://openalex.org/W2266240920","doi":"https://doi.org/10.1145/2838931.2838940","title":"Text segmentation and Chinese site search","display_name":"Text segmentation and Chinese site search","publication_year":2015,"publication_date":"2015-11-23","ids":{"openalex":"https://openalex.org/W2266240920","doi":"https://doi.org/10.1145/2838931.2838940","mag":"2266240920"},"language":"en","primary_location":{"id":"doi:10.1145/2838931.2838940","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2838931.2838940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th Australasian Document Computing Symposium","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101622399","display_name":"Liyuan Zhou","orcid":"https://orcid.org/0000-0001-9046-6098"},"institutions":[{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Liyuan Zhou","raw_affiliation_strings":["NICTA &amp; ANU"],"affiliations":[{"raw_affiliation_string":"NICTA &amp; ANU","institution_ids":["https://openalex.org/I42894916"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032211209","display_name":"David Hawking","orcid":"https://orcid.org/0000-0002-3704-5398"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"David Hawking","raw_affiliation_strings":["Microsoft &amp; ANU"],"affiliations":[{"raw_affiliation_string":"Microsoft &amp; ANU","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046377370","display_name":"Paul Thomas","orcid":"https://orcid.org/0000-0003-2425-3136"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Paul Thomas","raw_affiliation_strings":["CSIRO &amp; ANU"],"affiliations":[{"raw_affiliation_string":"CSIRO &amp; ANU","institution_ids":["https://openalex.org/I1292875679"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101622399"],"corresponding_institution_ids":["https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09180686,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.9410723447799683},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6960374116897583},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6714421510696411},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6590317487716675},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6505199670791626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6161909103393555},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6149247884750366},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5554088354110718},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.520438015460968},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.43654417991638184},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.17808681726455688},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.0727044939994812}],"concepts":[{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.9410723447799683},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6960374116897583},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6714421510696411},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6590317487716675},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6505199670791626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6161909103393555},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6149247884750366},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5554088354110718},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.520438015460968},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.43654417991638184},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.17808681726455688},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0727044939994812},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2838931.2838940","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2838931.2838940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th Australasian Document Computing Symposium","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.727.6455","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.727.6455","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://david-hawking.net/pubs/cn-segmentation.pdf","raw_type":"text"},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/103834","is_oa":false,"landing_page_url":"http://hdl.handle.net/1885/103834","pdf_url":null,"source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Text segmentation and Chinese site search","raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7900000214576721,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W114039280","https://openalex.org/W187510616","https://openalex.org/W1218353304","https://openalex.org/W1508113757","https://openalex.org/W1573198627","https://openalex.org/W1585524612","https://openalex.org/W1612847231","https://openalex.org/W1989983673","https://openalex.org/W2051885765","https://openalex.org/W2056469463","https://openalex.org/W2060234482","https://openalex.org/W2100259670","https://openalex.org/W2133968343","https://openalex.org/W2156037541","https://openalex.org/W2198380824","https://openalex.org/W2347599227","https://openalex.org/W2405675882","https://openalex.org/W3142894668","https://openalex.org/W6607538162","https://openalex.org/W6634412145"],"related_works":["https://openalex.org/W2197825247","https://openalex.org/W2126405470","https://openalex.org/W4388551919","https://openalex.org/W2296205523","https://openalex.org/W111205082","https://openalex.org/W2053243610","https://openalex.org/W2251446894","https://openalex.org/W1831215082","https://openalex.org/W2136165248","https://openalex.org/W2978383222"],"abstract_inverted_index":{"Automatic":[0],"segmentation":[1],"and":[2,32,51],"overlapping":[3],"bigrams":[4],"are":[5],"the":[6,12,43,56],"most":[7],"common":[8],"methods":[9],"for":[10],"overcoming":[11],"lack":[13],"of":[14,42,61],"explicit":[15],"word":[16],"boundaries":[17],"in":[18],"Chinese":[19,68],"text.":[20],"Past":[21],"studies":[22],"have":[23,29],"compared":[24],"their":[25],"effectiveness,":[26],"but":[27],"findings":[28],"been":[30,36],"equivocal":[31],"site":[33],"search":[34],"has":[35],"little":[37],"studied.":[38],"We":[39],"compare":[40],"representatives":[41],"two":[44],"approaches":[45],"using":[46],"a":[47],"465,000":[48],"page":[49],"crawl":[50],"test":[52],"queries":[53],"applicable":[54],"to":[55],"university":[57],"context.":[58],"503":[59],"pairs":[60],"result":[62],"sets":[63],"were":[64],"judged":[65],"by":[66],"56":[67],"students.":[69]},"counts_by_year":[],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
