{"id":"https://openalex.org/W2330916053","doi":"https://doi.org/10.18293/dms2015-024","title":"A Distributed Framework for NLP-Based Keyword and Keyphrase Extraction From Web Pages and Documents","display_name":"A Distributed Framework for NLP-Based Keyword and Keyphrase Extraction From Web Pages and Documents","publication_year":2015,"publication_date":"2015-09-01","ids":{"openalex":"https://openalex.org/W2330916053","doi":"https://doi.org/10.18293/dms2015-024","mag":"2330916053"},"language":"en","primary_location":{"id":"doi:10.18293/dms2015-024","is_oa":false,"landing_page_url":"https://doi.org/10.18293/dms2015-024","pdf_url":null,"source":{"id":"https://openalex.org/S4386872205","display_name":"Proceedings","issn_l":"2326-3261","issn":["2326-3261"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conferences on Distributed Multimedia Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075359388","display_name":"Paolo Nesi","orcid":"https://orcid.org/0000-0003-1044-3107"},"institutions":[{"id":"https://openalex.org/I45084792","display_name":"University of Florence","ror":"https://ror.org/04jr1s763","country_code":"IT","type":"education","lineage":["https://openalex.org/I45084792"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Paolo Nesi","raw_affiliation_strings":["Department of Information Engineering (DINFO), University of Florence -Firenze, Italy","Distributed Systems and Internet Technology Lab, DISIT Lab, http://www.disit.dinfo.unifi.it"],"affiliations":[{"raw_affiliation_string":"Department of Information Engineering (DINFO), University of Florence -Firenze, Italy","institution_ids":["https://openalex.org/I45084792"]},{"raw_affiliation_string":"Distributed Systems and Internet Technology Lab, DISIT Lab, http://www.disit.dinfo.unifi.it","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085546799","display_name":"Gianni Pantaleo","orcid":"https://orcid.org/0000-0002-9235-437X"},"institutions":[{"id":"https://openalex.org/I45084792","display_name":"University of Florence","ror":"https://ror.org/04jr1s763","country_code":"IT","type":"education","lineage":["https://openalex.org/I45084792"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Gianni Pantaleo","raw_affiliation_strings":["Department of Information Engineering (DINFO), University of Florence -Firenze, Italy","Distributed Systems and Internet Technology Lab, DISIT Lab, http://www.disit.dinfo.unifi.it"],"affiliations":[{"raw_affiliation_string":"Department of Information Engineering (DINFO), University of Florence -Firenze, Italy","institution_ids":["https://openalex.org/I45084792"]},{"raw_affiliation_string":"Distributed Systems and Internet Technology Lab, DISIT Lab, http://www.disit.dinfo.unifi.it","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011725691","display_name":"Gianmarco Sanesi","orcid":null},"institutions":[{"id":"https://openalex.org/I45084792","display_name":"University of Florence","ror":"https://ror.org/04jr1s763","country_code":"IT","type":"education","lineage":["https://openalex.org/I45084792"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Gianmarco Sanesi","raw_affiliation_strings":["Department of Information Engineering (DINFO), University of Florence -Firenze, Italy","Distributed Systems and Internet Technology Lab, DISIT Lab, http://www.disit.dinfo.unifi.it"],"affiliations":[{"raw_affiliation_string":"Department of Information Engineering (DINFO), University of Florence -Firenze, Italy","institution_ids":["https://openalex.org/I45084792"]},{"raw_affiliation_string":"Distributed Systems and Internet Technology Lab, DISIT Lab, http://www.disit.dinfo.unifi.it","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5085546799"],"corresponding_institution_ids":["https://openalex.org/I45084792"],"apc_list":null,"apc_paid":null,"fwci":1.392,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.81939537,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"2015","issue":null,"first_page":"155","last_page":"161"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9453999996185303,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9179999828338623,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8667408227920532},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5336238741874695},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5078702569007874},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4634328782558441},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3295520544052124}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8667408227920532},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5336238741874695},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5078702569007874},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4634328782558441},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3295520544052124}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18293/dms2015-024","is_oa":false,"landing_page_url":"https://doi.org/10.18293/dms2015-024","pdf_url":null,"source":{"id":"https://openalex.org/S4386872205","display_name":"Proceedings","issn_l":"2326-3261","issn":["2326-3261"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conferences on Distributed Multimedia Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:flore.unifi.it:2158/1021483","is_oa":false,"landing_page_url":"http://hdl.handle.net/2158/1021483","pdf_url":null,"source":{"id":"https://openalex.org/S4306402033","display_name":"Florence Research (University of Florence)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45084792","host_organization_name":"University of Florence","host_organization_lineage":["https://openalex.org/I45084792"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.41999998688697815,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W85238235","https://openalex.org/W1603470798","https://openalex.org/W1798501848","https://openalex.org/W1985554463","https://openalex.org/W2007848212","https://openalex.org/W2028859627","https://openalex.org/W2029367202","https://openalex.org/W2030903088","https://openalex.org/W2045181608","https://openalex.org/W2060772621","https://openalex.org/W2064418625","https://openalex.org/W2083094972","https://openalex.org/W2101114896","https://openalex.org/W2103099759","https://openalex.org/W2117523781","https://openalex.org/W2126399065","https://openalex.org/W2131975293","https://openalex.org/W2133149548","https://openalex.org/W2141222516","https://openalex.org/W2145049651","https://openalex.org/W2146769536","https://openalex.org/W2163659824","https://openalex.org/W2165200277","https://openalex.org/W2167329753","https://openalex.org/W2265611049","https://openalex.org/W2292981475","https://openalex.org/W2623325486","https://openalex.org/W4297805475","https://openalex.org/W4300988640"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2530322880","https://openalex.org/W3192589309"],"abstract_inverted_index":{"The":[0,165],"recent":[1],"growth":[2],"of":[3,16,26,46,70,144,175],"the":[4,14,38,43,142,145,158],"World":[5],"Wide":[6],"Web":[7],"at":[8],"increasing":[9],"rate":[10],"and":[11,13,31,49,65,86,90,102,116,178],"speed":[12],"number":[15],"online":[17],"available":[18],"resources":[19],"populating":[20],"Internet":[21],"represent":[22],"a":[23,124,133,153,172],"massive":[24],"source":[25,149,160],"knowledge":[27,35],"for":[28,37,80,127],"various":[29],"research":[30],"business":[32],"interests.":[33],"Such":[34],"is,":[36],"most":[39],"part,":[40],"embedded":[41],"in":[42,100,132,152],"textual":[44],"content":[45],"web":[47,176],"pages":[48,177],"documents,":[50],"which":[51],"is":[52],"largely":[53],"represented":[54],"as":[55,113],"unstructured":[56],"natural":[57],"language":[58],"formats.":[59],"In":[60],"order":[61],"to":[62,77],"automatically":[63],"ingest":[64],"process":[66],"such":[67,112],"huge":[68],"amounts":[69],"data,":[71],"single-machine,":[72],"non-distributed":[73],"architectures":[74],"are":[75,98],"proving":[76],"be":[78],"inefficient":[79],"tasks":[81,131],"like":[82],"Big":[83],"Data":[84],"mining":[85],"intensive":[87],"text":[88],"processing":[89],"analysis.":[91],"Current":[92],"Natural":[93],"Language":[94],"Processing":[95],"(NLP)":[96],"systems":[97],"growing":[99],"complexity,":[101],"computational":[103],"power":[104],"needs":[105],"have":[106],"been":[107,138,169],"significantly":[108],"increased,":[109],"requiring":[110],"solutions":[111],"distributed":[114,125],"frameworks":[115],"parallel":[117,134],"computing":[118],"programming":[119],"paradigms.":[120],"This":[121,136],"paper":[122],"presents":[123],"framework":[126,167],"executing":[128],"NLP":[129,150],"related":[130],"environment.":[135],"has":[137,168],"achieved":[139],"by":[140],"integrating":[141],"APIs":[143],"widespread":[146],"GATE":[147],"open":[148,159],"platform":[151],"multi-node":[154],"cluster,":[155],"built":[156],"upon":[157],"Apache":[161],"Hadoop":[162],"file":[163],"system.":[164],"proposed":[166],"evaluated":[170],"against":[171],"real":[173],"corpus":[174],"documents.":[179]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
