{"id":"https://openalex.org/W2910408265","doi":"https://doi.org/10.1109/aiccsa.2018.8612867","title":"A Cleaning Algorithm for Noiseless Opinion Mining Corpus Construction","display_name":"A Cleaning Algorithm for Noiseless Opinion Mining Corpus Construction","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2910408265","doi":"https://doi.org/10.1109/aiccsa.2018.8612867","mag":"2910408265"},"language":"en","primary_location":{"id":"doi:10.1109/aiccsa.2018.8612867","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiccsa.2018.8612867","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACS 15th International Conference on Computer Systems and Applications (AICCSA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007805677","display_name":"Otman Manad","orcid":null},"institutions":[{"id":"https://openalex.org/I48825208","display_name":"Universit\u00e9 Paris 8","ror":"https://ror.org/04wez5e68","country_code":"FR","type":"education","lineage":["https://openalex.org/I48825208"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Otman Manad","raw_affiliation_strings":["LIASD Laboratory University of of Paris8, Saint-Denis, France","Umanis (7 Rue Paul Vaillant Couturier, 92300 Levallois-Perret - France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIASD Laboratory University of of Paris8, Saint-Denis, France","institution_ids":["https://openalex.org/I48825208"]},{"raw_affiliation_string":"Umanis (7 Rue Paul Vaillant Couturier, 92300 Levallois-Perret - France)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102997620","display_name":"Anna Pappa","orcid":"https://orcid.org/0000-0003-2447-4078"},"institutions":[{"id":"https://openalex.org/I48825208","display_name":"Universit\u00e9 Paris 8","ror":"https://ror.org/04wez5e68","country_code":"FR","type":"education","lineage":["https://openalex.org/I48825208"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Anna Pappa","raw_affiliation_strings":["LIASD Laboratory University of of Paris8, Saint-Denis, France","LIASD - Laboratoire d'Informatique Avanc\u00e9e de Saint-Denis (Universit\u00e9 Paris 8 Vincennes-Saint-Denis - 2 rue de la Libert\u00e9 - 93526 Saint-Denis cedex - France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIASD Laboratory University of of Paris8, Saint-Denis, France","institution_ids":["https://openalex.org/I48825208"]},{"raw_affiliation_string":"LIASD - Laboratoire d'Informatique Avanc\u00e9e de Saint-Denis (Universit\u00e9 Paris 8 Vincennes-Saint-Denis - 2 rue de la Libert\u00e9 - 93526 Saint-Denis cedex - France)","institution_ids":["https://openalex.org/I48825208"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049573335","display_name":"Gilles Bernard","orcid":null},"institutions":[{"id":"https://openalex.org/I48825208","display_name":"Universit\u00e9 Paris 8","ror":"https://ror.org/04wez5e68","country_code":"FR","type":"education","lineage":["https://openalex.org/I48825208"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Gilles Bernard","raw_affiliation_strings":["LIASD Laboratory University of of Paris8, Saint-Denis, France","LIASD - Laboratoire d'Informatique Avanc\u00e9e de Saint-Denis (Universit\u00e9 Paris 8 Vincennes-Saint-Denis - 2 rue de la Libert\u00e9 - 93526 Saint-Denis cedex - France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIASD Laboratory University of of Paris8, Saint-Denis, France","institution_ids":["https://openalex.org/I48825208"]},{"raw_affiliation_string":"LIASD - Laboratoire d'Informatique Avanc\u00e9e de Saint-Denis (Universit\u00e9 Paris 8 Vincennes-Saint-Denis - 2 rue de la Libert\u00e9 - 93526 Saint-Denis cedex - France)","institution_ids":["https://openalex.org/I48825208"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8226,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.83000776,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7678210735321045},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.6575813293457031},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.53582763671875},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.5339822769165039},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4773333668708801},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.46947941184043884},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.430603951215744},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38466089963912964},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35948142409324646},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.34615594148635864},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.16858196258544922},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09427455067634583}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7678210735321045},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.6575813293457031},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.53582763671875},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.5339822769165039},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4773333668708801},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.46947941184043884},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.430603951215744},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38466089963912964},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35948142409324646},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34615594148635864},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.16858196258544922},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09427455067634583},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/aiccsa.2018.8612867","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiccsa.2018.8612867","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/ACS 15th International Conference on Computer Systems and Applications (AICCSA)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03625535v1","is_oa":false,"landing_page_url":"https://hal.science/hal-03625535","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2018 IEEE/ACS 15th International Conference on Computer Systems and Applications (AICCSA), Oct 2018, Aqaba, Jordan. pp.1-7, &#x27E8;10.1109/AICCSA.2018.8612867&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W46452414","https://openalex.org/W81717766","https://openalex.org/W84079877","https://openalex.org/W256861921","https://openalex.org/W1492472380","https://openalex.org/W1647671624","https://openalex.org/W1976216148","https://openalex.org/W1999740339","https://openalex.org/W2004918398","https://openalex.org/W2034190452","https://openalex.org/W2048071570","https://openalex.org/W2066792529","https://openalex.org/W2075350106","https://openalex.org/W2104086170","https://openalex.org/W2120101509","https://openalex.org/W2148317291","https://openalex.org/W2160654919","https://openalex.org/W2282641603","https://openalex.org/W2344771039","https://openalex.org/W2611014295","https://openalex.org/W2611147438","https://openalex.org/W2914186633","https://openalex.org/W4285719527","https://openalex.org/W6601892241","https://openalex.org/W6603240035","https://openalex.org/W6603373351","https://openalex.org/W6609691951","https://openalex.org/W6675573929","https://openalex.org/W6683687199","https://openalex.org/W6695579111","https://openalex.org/W6736735720","https://openalex.org/W6737234038","https://openalex.org/W6759325025","https://openalex.org/W6890451465","https://openalex.org/W6929990038"],"related_works":["https://openalex.org/W3089396779","https://openalex.org/W2548633793","https://openalex.org/W3013279174","https://openalex.org/W2941935829","https://openalex.org/W2596247554","https://openalex.org/W4301373556","https://openalex.org/W2095030957","https://openalex.org/W2066827917","https://openalex.org/W2884201223","https://openalex.org/W1941834444"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"DyCorC,":[3],"an":[4],"extractor":[5],"and":[6,23,48,59,64,78,93,154],"cleaner":[7],"of":[8,28,76,87,91,110,130,145],"web":[9],"forums":[10],"contents.":[11],"Its":[12,134],"main":[13],"points":[14],"are":[15,69],"that":[16],"the":[17,52,74,111,151,160],"process":[18],"is":[19,33,97,106,136],"entirely":[20],"automatic,":[21],"language-independent":[22],"adaptable":[24],"to":[25,36,98,108],"all":[26],"kinds":[27],"forum":[29],"architectures.":[30],"The":[31],"corpus":[32,122],"built":[34],"accordingly":[35],"user":[37],"queries":[38],"using":[39],"expressions":[40],"or":[41],"item":[42],"keywords":[43],"as":[44,159],"in":[45],"research":[46],"engines,":[47],"then":[49],"DyCorC":[50,125],"minimizes":[51],"boilerplate":[53],"for":[54,84],"further":[55],"feature-based":[56],"opinion":[57],"mining":[58],"sentiment":[60],"analysis,":[61],"gathering":[62],"comments":[63],"scorings.":[65],"Such":[66],"noiseless":[67,131],"corpora":[68],"usually":[70],"hand":[71],"made":[72],"with":[73,80,118,141],"help":[75],"crawlers":[77],"scrapers,":[79],"specific":[81],"containers":[82],"devised":[83],"each":[85],"type":[86],"forum,":[88],"entailing":[89],"lots":[90],"work":[92],"skills.":[94],"Our":[95,104],"aim":[96],"cut":[99],"down":[100],"this":[101],"preprocessing":[102],"stage.":[103],"algorithm":[105,135],"compared":[107,149],"state":[109],"art":[112],"models":[113],"(Apache":[114],"Nutch,":[115],"BootCat,":[116],"JusText),":[117],"a":[119,127],"gold":[120],"standard":[121],"we":[123],"released.":[124],"offers":[126],"better":[128],"quality":[129],"content":[132],"extraction.":[133],"based":[137],"on":[138,150],"DOM":[139],"trees":[140],"string":[142],"distances,":[143],"seven":[144],"which":[146],"have":[147],"been":[148,157],"reference":[152],"corpus,":[153],"feature-distance":[155],"has":[156],"chosen":[158],"best":[161],"fit.":[162]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
