{"id":"https://openalex.org/W2120614745","doi":"https://doi.org/10.1142/s0219622009003399","title":"DISTRIBUTION OF MULTI-WORDS IN CHINESE AND ENGLISH DOCUMENTS","display_name":"DISTRIBUTION OF MULTI-WORDS IN CHINESE AND ENGLISH DOCUMENTS","publication_year":2009,"publication_date":"2009-06-01","ids":{"openalex":"https://openalex.org/W2120614745","doi":"https://doi.org/10.1142/s0219622009003399","mag":"2120614745"},"language":"en","primary_location":{"id":"doi:10.1142/s0219622009003399","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219622009003399","pdf_url":null,"source":{"id":"https://openalex.org/S207089700","display_name":"International Journal of Information Technology & Decision Making","issn_l":"0219-6220","issn":["0219-6220","1793-6845"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Information Technology &amp; Decision Making","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100444344","display_name":"Wen Zhang","orcid":"https://orcid.org/0000-0001-6513-5932"},"institutions":[{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN","JP"],"is_corresponding":true,"raw_author_name":"WEN ZHANG","raw_affiliation_strings":["Laboratory of Internet Software Technologies, Institute of Software, Chinese Academy of Sciences, Beijing 100190, P. R. China","School of Knowledge Science, Japan Advanced Institute, of Science and Technology, 1-1 Asahidai, Tatsunokuchi, Ishikawa 923-1292, Japan"],"affiliations":[{"raw_affiliation_string":"Laboratory of Internet Software Technologies, Institute of Software, Chinese Academy of Sciences, Beijing 100190, P. R. China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Knowledge Science, Japan Advanced Institute, of Science and Technology, 1-1 Asahidai, Tatsunokuchi, Ishikawa 923-1292, Japan","institution_ids":["https://openalex.org/I177738480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071489199","display_name":"Taketoshi Yoshida","orcid":"https://orcid.org/0000-0002-9937-1703"},"institutions":[{"id":"https://openalex.org/I177738480","display_name":"Japan Advanced Institute of Science and Technology","ror":"https://ror.org/03frj4r98","country_code":"JP","type":"education","lineage":["https://openalex.org/I177738480"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"TAKETOSHI YOSHIDA","raw_affiliation_strings":["School of Knowledge Science, Japan Advanced Institute, of Science and Technology, 1-1 Asahidai, Tatsunokuchi, Ishikawa 923-1292, Japan","School of Knowledge Science, Japan Advanced Institute of Science and Technology 1-1, Asahidai, Tatsunokuchi, Ishikawa, 923-1292, Japan#TAB#"],"affiliations":[{"raw_affiliation_string":"School of Knowledge Science, Japan Advanced Institute, of Science and Technology, 1-1 Asahidai, Tatsunokuchi, Ishikawa 923-1292, Japan","institution_ids":["https://openalex.org/I177738480"]},{"raw_affiliation_string":"School of Knowledge Science, Japan Advanced Institute of Science and Technology 1-1, Asahidai, Tatsunokuchi, Ishikawa, 923-1292, Japan#TAB#","institution_ids":["https://openalex.org/I177738480"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110310046","display_name":"Xijin Tang","orcid":"https://orcid.org/0000-0002-2315-1755"},"institutions":[{"id":"https://openalex.org/I4210120485","display_name":"Academy of Mathematics and Systems Science","ror":"https://ror.org/02jkmyk67","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210120485"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"XIJIN TANG","raw_affiliation_strings":["Institute of Systems Science, Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Beijing 100190, P. R. China"],"affiliations":[{"raw_affiliation_string":"Institute of Systems Science, Academy of Mathematics and Systems Science, Chinese Academy of Sciences, Beijing 100190, P. R. China","institution_ids":["https://openalex.org/I4210120485","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100444344"],"corresponding_institution_ids":["https://openalex.org/I177738480","https://openalex.org/I19820366","https://openalex.org/I4210128818"],"apc_list":null,"apc_paid":null,"fwci":1.3549,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.85211879,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"08","issue":"02","first_page":"249","last_page":"265"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6482941508293152},{"id":"https://openalex.org/keywords/burstiness","display_name":"Burstiness","score":0.6081116199493408},{"id":"https://openalex.org/keywords/negative-binomial-distribution","display_name":"Negative binomial distribution","score":0.5771681666374207},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5451353192329407},{"id":"https://openalex.org/keywords/poisson-distribution","display_name":"Poisson distribution","score":0.5365635752677917},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.49318927526474},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47314178943634033},{"id":"https://openalex.org/keywords/collocation","display_name":"Collocation (remote sensing)","score":0.46294084191322327},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.32660022377967834},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2543392777442932},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1196489930152893}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6482941508293152},{"id":"https://openalex.org/C2781023610","wikidata":"https://www.wikidata.org/wiki/Q17006304","display_name":"Burstiness","level":3,"score":0.6081116199493408},{"id":"https://openalex.org/C199335787","wikidata":"https://www.wikidata.org/wiki/Q743364","display_name":"Negative binomial distribution","level":3,"score":0.5771681666374207},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5451353192329407},{"id":"https://openalex.org/C100906024","wikidata":"https://www.wikidata.org/wiki/Q205692","display_name":"Poisson distribution","level":2,"score":0.5365635752677917},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.49318927526474},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47314178943634033},{"id":"https://openalex.org/C80023036","wikidata":"https://www.wikidata.org/wiki/Q5147531","display_name":"Collocation (remote sensing)","level":2,"score":0.46294084191322327},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32660022377967834},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2543392777442932},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1196489930152893},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C158379750","wikidata":"https://www.wikidata.org/wiki/Q214111","display_name":"Network packet","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1142/s0219622009003399","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219622009003399","pdf_url":null,"source":{"id":"https://openalex.org/S207089700","display_name":"International Journal of Information Technology & Decision Making","issn_l":"0219-6220","issn":["0219-6220","1793-6845"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Information Technology &amp; Decision Making","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.331.9716","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.331.9716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://meta-synthesis.iss.ac.cn/xjtang/paper/IJITDM0802_zhangwen.pdf","raw_type":"text"},{"id":"pmh:oai:RePEc:wsi:ijitdm:v:08:y:2009:i:02:n:s0219622009003399","is_oa":false,"landing_page_url":"http://www.worldscientific.com/doi/abs/10.1142/S0219622009003399","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:RePEc:wsi:ijitdm:v:08:y:2009:i:02:p:249-265","is_oa":false,"landing_page_url":"http://www.worldscinet.com/cgi-bin/details.cgi?type=html&amp;id=pii:S0219622009003399","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W6436850","https://openalex.org/W572705667","https://openalex.org/W1506595084","https://openalex.org/W1547032355","https://openalex.org/W1574901103","https://openalex.org/W1593045043","https://openalex.org/W1965086361","https://openalex.org/W1980300096","https://openalex.org/W1990194892","https://openalex.org/W2008434289","https://openalex.org/W2010010720","https://openalex.org/W2014491187","https://openalex.org/W2038141736","https://openalex.org/W2039741433","https://openalex.org/W2043914681","https://openalex.org/W2044070623","https://openalex.org/W2079656678","https://openalex.org/W2085314032","https://openalex.org/W2108903064","https://openalex.org/W2113110240","https://openalex.org/W2121182452","https://openalex.org/W2135631383","https://openalex.org/W2170610543","https://openalex.org/W2883779626","https://openalex.org/W4238346259","https://openalex.org/W4248343126","https://openalex.org/W6610335769"],"related_works":["https://openalex.org/W2362350383","https://openalex.org/W2995051414","https://openalex.org/W2047907216","https://openalex.org/W2129537379","https://openalex.org/W1553793542","https://openalex.org/W2036731318","https://openalex.org/W1991230619","https://openalex.org/W4311363496","https://openalex.org/W4319841362","https://openalex.org/W4232372158"],"abstract_inverted_index":{"As":[0],"a":[1,17,31,42,62,82,144],"hybrid":[2],"of":[3,22,35,64,67,115,117,132,137],"N-gram":[4],"in":[5,11,20,130,147],"natural":[6],"language":[7],"processing":[8],"and":[9,25,56,77,99,120],"collocation":[10],"statistical":[12],"linguistics,":[13],"multi-word":[14,84,133],"is":[15,37,87,124,143,152],"becoming":[16],"hot":[18],"topic":[19],"area":[21],"text":[23],"mining":[24],"information":[26],"retrieval.":[27],"In":[28,80],"this":[29,138],"paper,":[30],"study":[32,139],"concerning":[33],"distribution":[34,123,129],"multi-words":[36,76,91],"carried":[38],"out":[39],"to":[40,89,112,126],"explore":[41],"theoretical":[43],"basis":[44],"for":[45,73,156,162],"probabilistic":[46],"term-weighting":[47],"scheme.":[48],"Specifically,":[49],"the":[50,109,121],"Poisson":[51,122],"distribution,":[52,55],"zero-inflated":[53,127],"binomial":[54,128],"G-distribution":[57,107],"are":[58],"comparatively":[59],"studied":[60],"on":[61,95],"task":[63],"predicting":[65],"probabilities":[66,114],"multi-words'":[68,118],"occurrences":[69],"using":[70],"these":[71],"distributions,":[72],"both":[74],"technical":[75],"nontechnical":[78],"multi-words.":[79,163],"addition,":[81],"rule-based":[83],"extraction":[85],"algorithm":[86],"proposed":[88],"extract":[90],"from":[92],"texts":[93],"based":[94],"words'":[96],"occurring":[97],"patterns":[98],"syntactical":[100],"structures.":[101],"Our":[102],"experimental":[103],"results":[104],"demonstrate":[105],"that":[106,141],"has":[108],"best":[110],"capability":[111],"predict":[113],"frequency":[116],"occurrence":[119],"comparable":[125],"estimation":[131],"distribution.":[134],"The":[135],"outcome":[136],"validates":[140],"burstiness":[142],"universal":[145],"phenomenon":[146],"linguistic":[148],"count":[149],"data,":[150],"which":[151],"applicable":[153],"not":[154],"only":[155],"individual":[157],"content":[158],"words":[159],"but":[160],"also":[161]},"counts_by_year":[{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
