{"id":"https://openalex.org/W2110546295","doi":"https://doi.org/10.1109/itcc.2003.1197520","title":"Phrase-based text representation for managing the Web documents","display_name":"Phrase-based text representation for managing the Web documents","publication_year":2004,"publication_date":"2004-05-13","ids":{"openalex":"https://openalex.org/W2110546295","doi":"https://doi.org/10.1109/itcc.2003.1197520","mag":"2110546295"},"language":"en","primary_location":{"id":"doi:10.1109/itcc.2003.1197520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itcc.2003.1197520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings ITCC 2003. International Conference on Information Technology: Coding and Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062974371","display_name":"R. Sharma","orcid":"https://orcid.org/0000-0003-1728-8912"},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"R. Sharma","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology, Chennai, India","Dept of Computer Science and Engg, Indian Institute of Technology Madras, Chennai, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology, Chennai, India","institution_ids":["https://openalex.org/I24676775"]},{"raw_affiliation_string":"Dept of Computer Science and Engg, Indian Institute of Technology Madras, Chennai, India","institution_ids":["https://openalex.org/I24676775"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070142969","display_name":"Shanmuganathan Raman","orcid":"https://orcid.org/0000-0003-2718-7891"},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"S. Raman","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology, Chennai, India","Dept of Computer Science and Engg, Indian Institute of Technology Madras, Chennai, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology, Chennai, India","institution_ids":["https://openalex.org/I24676775"]},{"raw_affiliation_string":"Dept of Computer Science and Engg, Indian Institute of Technology Madras, Chennai, India","institution_ids":["https://openalex.org/I24676775"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062974371"],"corresponding_institution_ids":["https://openalex.org/I24676775"],"apc_list":null,"apc_paid":null,"fwci":1.7988,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.87588516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"165","last_page":"169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10679","display_name":"Service-Oriented Architecture and Web Services","score":0.979200005531311,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8862794041633606},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6747578382492065},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6732794046401978},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.6300109624862671},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.5874494910240173},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5816646814346313},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5712208151817322},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5529555678367615},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5525493621826172},{"id":"https://openalex.org/keywords/keyword-extraction","display_name":"Keyword extraction","score":0.48604780435562134},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4824815094470978},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4810890853404999},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4713651239871979},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4636344611644745},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4514622986316681},{"id":"https://openalex.org/keywords/html-element","display_name":"HTML element","score":0.43094396591186523},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.27200847864151},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2302311658859253},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11508595943450928}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8862794041633606},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6747578382492065},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6732794046401978},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.6300109624862671},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.5874494910240173},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5816646814346313},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5712208151817322},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5529555678367615},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5525493621826172},{"id":"https://openalex.org/C2780288562","wikidata":"https://www.wikidata.org/wiki/Q25053353","display_name":"Keyword extraction","level":2,"score":0.48604780435562134},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4824815094470978},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4810890853404999},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4713651239871979},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4636344611644745},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4514622986316681},{"id":"https://openalex.org/C81639021","wikidata":"https://www.wikidata.org/wiki/Q179551","display_name":"HTML element","level":3,"score":0.43094396591186523},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.27200847864151},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2302311658859253},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11508595943450928},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/itcc.2003.1197520","is_oa":false,"landing_page_url":"https://doi.org/10.1109/itcc.2003.1197520","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings ITCC 2003. International Conference on Information Technology: Coding and Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4399999976158142}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1520232900","https://openalex.org/W1549026077","https://openalex.org/W1594962278","https://openalex.org/W1994300392","https://openalex.org/W1997841190","https://openalex.org/W2002306339","https://openalex.org/W2125473504","https://openalex.org/W4239696231","https://openalex.org/W4285719527","https://openalex.org/W6631205191"],"related_works":["https://openalex.org/W2355481812","https://openalex.org/W3014300186","https://openalex.org/W2165504147","https://openalex.org/W2154146989","https://openalex.org/W2065605022","https://openalex.org/W2352911637","https://openalex.org/W4283768689","https://openalex.org/W2066380224","https://openalex.org/W1788528807","https://openalex.org/W2367158473"],"abstract_inverted_index":{"The":[0],"World":[1],"Wide":[2],"Web":[3],"has":[4],"provided":[5],"the":[6,12,20,24,32,44,60,78,81,88,123,131],"facility":[7],"of":[8,14,19,34,57,68,80,87,108,118,130],"bringing":[9],"information":[10,36],"to":[11,38,144],"fingertips":[13],"its":[15,75],"users.":[16],"Since":[17],"most":[18],"documents":[21,112],"available":[22],"on":[23,115],"web":[25],"are":[26,52,66],"machine-readable":[27],"but":[28],"not":[29],"machine-understandable,":[30],"ensuring":[31],"retrieval":[33,140],"relevant":[35],"continues":[37],"be":[39,145],"a":[40,94,116],"difficult":[41],"task.":[42],"In":[43],"traditional":[45],"text":[46,96,111],"representation":[47,97],"approach,":[48],"high":[49],"frequency":[50,73],"keywords":[51],"used":[53],"as":[54],"term":[55],"representatives":[56],"text.":[58],"However,":[59],"main":[61],"drawbacks":[62],"in":[63,133,136,139],"this":[64,91],"approach":[65,98],"lack":[67],"direct":[69],"relationship":[70],"between":[71],"word":[72,82],"and":[74,77],"importance,":[76],"effect":[79],"ambiguities.":[83],"Considering":[84],"these":[85],"shortcomings":[86],"keyword-based":[89],"method,":[90],"paper":[92],"presents":[93],"phrase-based":[95],"that":[99],"uses":[100],"rule-based":[101],"natural":[102],"language":[103],"processing":[104],"(NLP)":[105],"techniques.":[106],"Extraction":[107],"key-phrases":[109],"from":[110],"is":[113,142],"based":[114],"process":[117],"partial":[119],"parsing.":[120],"By":[121],"making":[122],"indexing":[124],"terms":[125],"more":[126],"meaningful":[127],"through":[128],"reduction":[129],"ambiguity":[132],"words":[134],"considered":[135],"isolation,":[137],"improvement":[138],"effectiveness":[141],"sought":[143],"achieved.":[146]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
