{"id":"https://openalex.org/W2197164549","doi":"https://doi.org/10.1145/2736277.2741628","title":"N-gram IDF","display_name":"N-gram IDF","publication_year":2015,"publication_date":"2015-05-18","ids":{"openalex":"https://openalex.org/W2197164549","doi":"https://doi.org/10.1145/2736277.2741628","mag":"2197164549"},"language":"en","primary_location":{"id":"doi:10.1145/2736277.2741628","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2736277.2741628","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th International Conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010966769","display_name":"Masumi Shirakawa","orcid":null},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"Osaka University","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Masumi Shirakawa","raw_affiliation_strings":["Osaka University, Osaka, Japan"],"affiliations":[{"raw_affiliation_string":"Osaka University, Osaka, Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012025870","display_name":"Takahiro Hara","orcid":"https://orcid.org/0000-0003-4807-3156"},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"Osaka University","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takahiro Hara","raw_affiliation_strings":["Osaka University, Osaka, Japan"],"affiliations":[{"raw_affiliation_string":"Osaka University, Osaka, Japan","institution_ids":["https://openalex.org/I98285908"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063345953","display_name":"Shojiro Nishio","orcid":null},"institutions":[{"id":"https://openalex.org/I98285908","display_name":"Osaka University","ror":"https://ror.org/035t8zc32","country_code":"JP","type":"education","lineage":["https://openalex.org/I98285908"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shojiro Nishio","raw_affiliation_strings":["Osaka University, Osaka, Japan"],"affiliations":[{"raw_affiliation_string":"Osaka University, Osaka, Japan","institution_ids":["https://openalex.org/I98285908"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5010966769"],"corresponding_institution_ids":["https://openalex.org/I98285908"],"apc_list":null,"apc_paid":null,"fwci":4.3144,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.94799028,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":93,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"960","last_page":"970"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.8571233749389648},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.7352757453918457},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6525366306304932},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.5944809317588806},{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.5389182567596436},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4966002106666565},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4418529272079468},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38425713777542114},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3262343406677246},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3206416070461273},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.09534472227096558}],"concepts":[{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.8571233749389648},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.7352757453918457},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6525366306304932},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.5944809317588806},{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.5389182567596436},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4966002106666565},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4418529272079468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38425713777542114},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3262343406677246},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3206416070461273},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.09534472227096558},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2736277.2741628","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2736277.2741628","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th International Conference on World Wide Web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W68251067","https://openalex.org/W109020964","https://openalex.org/W153318553","https://openalex.org/W1482214997","https://openalex.org/W1499624045","https://openalex.org/W1532325895","https://openalex.org/W1546703457","https://openalex.org/W1561328404","https://openalex.org/W1593045043","https://openalex.org/W1638203394","https://openalex.org/W1815301076","https://openalex.org/W1956559956","https://openalex.org/W1965667542","https://openalex.org/W1977766834","https://openalex.org/W1981038351","https://openalex.org/W1981159181","https://openalex.org/W1982442952","https://openalex.org/W2008434289","https://openalex.org/W2018140810","https://openalex.org/W2024932032","https://openalex.org/W2026655145","https://openalex.org/W2030453903","https://openalex.org/W2030839740","https://openalex.org/W2067970404","https://openalex.org/W2068967633","https://openalex.org/W2080068076","https://openalex.org/W2096468639","https://openalex.org/W2101746535","https://openalex.org/W2103196492","https://openalex.org/W2120429080","https://openalex.org/W2128859735","https://openalex.org/W2131846894","https://openalex.org/W2134283755","https://openalex.org/W2134696992","https://openalex.org/W2140321362","https://openalex.org/W2144211451","https://openalex.org/W2151453116","https://openalex.org/W2154527162","https://openalex.org/W2158903965","https://openalex.org/W2160517426","https://openalex.org/W2165236847","https://openalex.org/W2165612380","https://openalex.org/W2165897980","https://openalex.org/W2295278941","https://openalex.org/W2325227998","https://openalex.org/W2402917303","https://openalex.org/W2612649659","https://openalex.org/W2772164361","https://openalex.org/W2803437449","https://openalex.org/W2930957955","https://openalex.org/W4213138647","https://openalex.org/W4230960895","https://openalex.org/W4233798822","https://openalex.org/W4238346259","https://openalex.org/W4255005259","https://openalex.org/W6628905179","https://openalex.org/W6640862754","https://openalex.org/W6655297870","https://openalex.org/W6679642144"],"related_works":["https://openalex.org/W2136519138","https://openalex.org/W1989944203","https://openalex.org/W1998479724","https://openalex.org/W3034305000","https://openalex.org/W2546839375","https://openalex.org/W3082647531","https://openalex.org/W2999485669","https://openalex.org/W2963185483","https://openalex.org/W2141389247","https://openalex.org/W3127593639"],"abstract_inverted_index":{"This":[0],"paper":[1],"first":[2],"reveals":[3],"the":[4,33,41,44,47,51,58,67,127,189],"relationship":[5],"between":[6,43],"Inverse":[7],"Document":[8],"Frequency":[9],"(IDF),":[10],"a":[11,19,29,36,78],"global":[12],"term":[13,37,45,158],"weighting":[14],"scheme,":[15],"and":[16,46,66,86,110,147,160,165,184],"information":[17,54],"distance,":[18],"universal":[20],"metric":[21],"defined":[22],"by":[23],"Kolmogorov":[24,59],"complexity.":[25],"We":[26,153],"concretely":[27],"give":[28],"theoretical":[30,79],"explanation":[31],"that":[32,167,175],"IDF":[34,82,100,169],"of":[35,53,81,88,96,114,191],"is":[38,61],"equal":[39],"to":[40,103],"distance":[42,55],"empty":[48],"string":[49,136],"in":[50,56],"space":[52],"which":[57],"complexity":[60],"approximated":[62],"using":[63,120,143,150,181],"Web":[64,161],"documents":[65],"Shannon-Fano":[68],"coding.":[69],"Based":[70],"on":[71,156],"our":[72],"findings,":[73],"we":[74,133],"propose":[75],"N-gram":[76,99,168,192],"IDF,":[77],"extension":[80],"for":[83,129,178],"handling":[84],"words":[85],"phrases":[87],"any":[89,97,115,121],"length.":[90],"By":[91],"comparing":[92],"weights":[93],"among":[94,107],"N-grams":[95,106],"N,":[98],"enables":[101],"us":[102],"determine":[104],"dominant":[105],"overlapping":[108],"ones":[109],"extract":[111],"key":[112,157],"terms":[113],"length":[116],"from":[117],"texts":[118],"without":[119],"NLP":[122],"techniques.":[123],"To":[124],"efficiently":[125],"compute":[126],"weight":[128],"all":[130],"possible":[131],"N-grams,":[132],"adopt":[134],"two":[135],"processing":[137],"techniques,":[138],"i.e.,":[139],"maximal":[140],"substring":[141],"extraction":[142,159],"enhanced":[144],"suffix":[145],"array":[146],"document":[148],"listing":[149],"wavelet":[151],"tree.":[152],"conducted":[154],"experiments":[155],"search":[162],"query":[163],"segmentation,":[164],"found":[166],"was":[170],"competitive":[171],"with":[172],"state-of-the-art":[173],"methods":[174],"were":[176],"designed":[177],"each":[179],"application":[180],"additional":[182],"resources":[183],"efforts.":[185],"The":[186],"results":[187],"exemplified":[188],"potential":[190],"IDF.":[193]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
