{"id":"https://openalex.org/W2053618623","doi":"https://doi.org/10.1109/cit.2004.1357323","title":"Notice of Violation of IEEE Publication Principles: An efficient method of eliminating noisy information in web pages for data mining","display_name":"Notice of Violation of IEEE Publication Principles: An efficient method of eliminating noisy information in web pages for data mining","publication_year":2004,"publication_date":"2004-01-01","ids":{"openalex":"https://openalex.org/W2053618623","doi":"https://doi.org/10.1109/cit.2004.1357323","mag":"2053618623"},"language":"en","primary_location":{"id":"doi:10.1109/cit.2004.1357323","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cit.2004.1357323","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Fourth International Conference onComputer and Information Technology, 2004. CIT '04.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007688382","display_name":"Amiya Kumar Tripathy","orcid":"https://orcid.org/0000-0002-0688-4092"},"institutions":[{"id":"https://openalex.org/I152869788","display_name":"Motilal Nehru National Institute of Technology","ror":"https://ror.org/04dp7tp96","country_code":"IN","type":"education","lineage":["https://openalex.org/I152869788"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"A.K. Tripathy","raw_affiliation_strings":["Fourth International Conference on Computer and Information Technology, Wuhan, China","M N Nat. Inst. of Technol., Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Fourth International Conference on Computer and Information Technology, Wuhan, China","institution_ids":[]},{"raw_affiliation_string":"M N Nat. Inst. of Technol., Allahabad, India","institution_ids":["https://openalex.org/I152869788"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100754863","display_name":"Arun Kumar Singh","orcid":"https://orcid.org/0000-0001-7317-7032"},"institutions":[{"id":"https://openalex.org/I152869788","display_name":"Motilal Nehru National Institute of Technology","ror":"https://ror.org/04dp7tp96","country_code":"IN","type":"education","lineage":["https://openalex.org/I152869788"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"A.K. Singh","raw_affiliation_strings":["Fourth International Conference on Computer and Information Technology, Wuhan, China","M N Nat. Inst. of Technol., Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Fourth International Conference on Computer and Information Technology, Wuhan, China","institution_ids":[]},{"raw_affiliation_string":"M N Nat. Inst. of Technol., Allahabad, India","institution_ids":["https://openalex.org/I152869788"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5007688382"],"corresponding_institution_ids":["https://openalex.org/I152869788"],"apc_list":null,"apc_paid":null,"fwci":0.7891,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.84803978,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"978","last_page":"985"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8185667991638184},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.7769708633422852},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.595284640789032},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5797587633132935},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5635011196136475},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.5597265362739563},{"id":"https://openalex.org/keywords/notice","display_name":"Notice","score":0.5379481911659241},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5149141550064087},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4824029207229614},{"id":"https://openalex.org/keywords/document-object-model","display_name":"Document Object Model","score":0.44261792302131653},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.43195295333862305},{"id":"https://openalex.org/keywords/web-site","display_name":"Web site","score":0.4205433428287506},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3793736696243286},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.3107362985610962},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.2556005120277405},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20089766383171082}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8185667991638184},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.7769708633422852},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.595284640789032},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5797587633132935},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5635011196136475},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.5597265362739563},{"id":"https://openalex.org/C2779913896","wikidata":"https://www.wikidata.org/wiki/Q7063001","display_name":"Notice","level":2,"score":0.5379481911659241},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5149141550064087},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4824029207229614},{"id":"https://openalex.org/C137922610","wikidata":"https://www.wikidata.org/wiki/Q2093","display_name":"Document Object Model","level":3,"score":0.44261792302131653},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.43195295333862305},{"id":"https://openalex.org/C2984519610","wikidata":"https://www.wikidata.org/wiki/Q35127","display_name":"Web site","level":3,"score":0.4205433428287506},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3793736696243286},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.3107362985610962},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.2556005120277405},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20089766383171082},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cit.2004.1357323","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cit.2004.1357323","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Fourth International Conference onComputer and Information Technology, 2004. CIT '04.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.6100000143051147}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1506845741","https://openalex.org/W1550206324","https://openalex.org/W1996869586","https://openalex.org/W2005677600","https://openalex.org/W2019264297","https://openalex.org/W2040075907","https://openalex.org/W2068863058","https://openalex.org/W2085989833","https://openalex.org/W2117148965","https://openalex.org/W2118587067","https://openalex.org/W2120804083","https://openalex.org/W2124436456","https://openalex.org/W2129851526","https://openalex.org/W2138621811","https://openalex.org/W2143956100","https://openalex.org/W2146990843","https://openalex.org/W2149636015","https://openalex.org/W2156632103","https://openalex.org/W2951911250","https://openalex.org/W4244354591","https://openalex.org/W6632865047","https://openalex.org/W7001238702"],"related_works":["https://openalex.org/W2384888906","https://openalex.org/W2144190808","https://openalex.org/W2101955803","https://openalex.org/W2376314740","https://openalex.org/W2366644548","https://openalex.org/W2357241418","https://openalex.org/W2119214692","https://openalex.org/W2115485936","https://openalex.org/W1513692756","https://openalex.org/W2119135658"],"abstract_inverted_index":{"Notice":[0],"of":[1,3,9,44,49,67,87,97,130,172,230,257,287,299,329,341,357],"Violation":[2],"IEEE":[4],"Publication":[5,69],"Principles<br><br>\"An":[6],"Efficient":[7],"Method":[8],"Eliminating":[10,252],"Noisy":[11,163],"Information":[12,31,164],"in":[13,65,165,242,290,353],"Web":[14,166,192,249,293,355,378],"Pages":[15,167],"for":[16,168,308],"Data":[17,169,181],"Mining,\"<br>":[18,170],"Tripathy,":[19],"A.":[20,23],"K.;":[21],"Singh,":[22],"K.<br>The":[24],"Fourth":[25],"International":[26,176],"Conference":[27,177],"on":[28],"Computer":[29],"and":[30,41,47,79,123,146,160,214,217,283,334,350],"Technology":[32],"(CIT'04),<br>September":[33],"14-16,":[34],"2004,":[35],"Wuhan,":[36],"China,":[37],"pp.":[38],"978-985.<br><br>After":[39],"careful":[40],"considered":[42],"review":[43],"the":[45,85,88,98,102,117,128,153,173,201,226,231,233,239,279,284,288,297,300,309,314,330,338,342,358,365],"content":[46,203,228],"authorship":[48],"this":[50,58,80,131,144,261,362],"paper":[51,59,93,103,121],"by":[52,360,372],"a":[53,95,265,271,291,302,373],"duly":[54],"constituted":[55],"expert":[56],"committee,":[57],"has":[60,75,207],"been":[61],"found":[62],"to":[63,116,127,138,143,152,277,325,348,364],"be":[64,136,150,306],"violation":[66,81],"IEEE?s":[68],"Principles.<br><br>The":[70],"first":[71],"author":[72,90],"(A.K.":[73,91],"Tripathy)":[74],"taken":[76],"full":[77],"responsibility":[78],"was":[82,109],"done":[83],"without":[84,111,124],"knowledge":[86],"second":[89],"Singh).<br><br>This":[92],"is":[94,255,346,370,377],"duplication":[96],"original":[99,107,118],"text":[100,108],"from":[101,200],"cited":[104],"below.":[105],"The":[106,344,367],"copied":[110],"attribution":[112],"(including":[113],"appropriate":[114],"references":[115,142,148],"author(s)":[119],"and/or":[120],"title)":[122],"permission.":[125],"Due":[126],"nature":[129],"violation,":[132],"reasonable":[133],"effort":[134],"should":[135,149],"made":[137,151],"remove":[139],"all":[140],"past":[141],"paper,":[145],"future":[147],"following":[154],"article:<br><br>":[155],"Lan":[156],"Yi,":[157],"Bing":[158],"Liu,":[159],"Xiaoli":[161],"Li.<br>\"Eliminating":[162],"Proceedings":[171],"ACM":[174],"SIGKDD":[175],"on<br>Knowledge":[178],"Discovery":[179],"&":[180],"Mining":[182],"(KDD-2003),<br>Washington,":[183],"DC,":[184],"USA,":[185],"August":[186],"24-27,":[187],"2003.<br><br>":[188],"<br/>":[189],"A":[190],"commercial":[191],"page":[193,232,356,363],"typically":[194],"contains":[195],"many":[196],"information":[197,240],"blocks.":[198,235],"Apart":[199],"main":[202,227,339],"blocks,":[204],"it":[205],"usually":[206],"such":[208],"blocks":[209,222,229,245],"as":[210],"navigation":[211],"panels,":[212],"copyright":[213],"privacy":[215],"notices,":[216],"advertisements.":[218],"We":[219,236,269,319],"call":[220,313],"these":[221,243,253],"that":[223,238,376],"are":[224],"not":[225],"noisy":[234,244],"show":[237],"contained":[241],"can":[246,305],"seriously":[247],"harm":[248],"data":[250],"mining.":[251],"noises":[254,333,352],"thus":[256],"great":[258],"importance.":[259],"In":[260],"work,":[262],"we":[263,312],"propose":[264,270],"noise":[266],"elimination":[267],"technique.":[268],"tree":[272,304,317],"structure,":[273],"called":[274],"pattern":[275,303,316],"tree,":[276],"capture":[278],"common":[280],"presentation":[281],"styles":[282],"actual":[285],"contents":[286,340],"pages":[289,298],"given":[292],"site.":[294,343],"By":[295],"sampling":[296],"site,":[301,310],"built":[307],"which":[311,327,335],"site":[315,359],"(SPT).":[318],"then":[320],"introduce":[321],"an":[322],"information-based":[323],"measure":[324],"determine":[326],"parts":[328,336],"SPT":[331,345],"represent":[332,337],"employed":[347],"detect":[349],"eliminate":[351],"any":[354],"mapping":[361],"SPT.":[366],"proposed":[368],"technique":[369],"evaluated":[371],"data-mining":[374],"task":[375],"clustering.":[379]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
