{"id":"https://openalex.org/W3034231332","doi":"https://doi.org/10.1145/3394332.3402838","title":"Information Extraction from the Long Tail","display_name":"Information Extraction from the Long Tail","publication_year":2020,"publication_date":"2020-07-05","ids":{"openalex":"https://openalex.org/W3034231332","doi":"https://doi.org/10.1145/3394332.3402838","mag":"3034231332"},"language":"en","primary_location":{"id":"doi:10.1145/3394332.3402838","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394332.3402838","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"12th ACM Conference on Web Science Companion","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.soton.ac.uk/441265/1/WebSci_2020_STAIDCC_middleton_accepted.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088719080","display_name":"Stuart E. Middleton","orcid":"https://orcid.org/0000-0001-8305-8176"},"institutions":[{"id":"https://openalex.org/I43439940","display_name":"University of Southampton","ror":"https://ror.org/01ryk1543","country_code":"GB","type":"education","lineage":["https://openalex.org/I43439940"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Stuart E. Middleton","raw_affiliation_strings":["University of Southampton, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Southampton, United Kingdom","institution_ids":["https://openalex.org/I43439940"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064504320","display_name":"Anita Lavorgna","orcid":"https://orcid.org/0000-0001-8484-1613"},"institutions":[{"id":"https://openalex.org/I43439940","display_name":"University of Southampton","ror":"https://ror.org/01ryk1543","country_code":"GB","type":"education","lineage":["https://openalex.org/I43439940"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Anita Lavorgna","raw_affiliation_strings":["University of Southampton, UK"],"affiliations":[{"raw_affiliation_string":"University of Southampton, UK","institution_ids":["https://openalex.org/I43439940"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040714545","display_name":"Geoff Neumann","orcid":null},"institutions":[{"id":"https://openalex.org/I43439940","display_name":"University of Southampton","ror":"https://ror.org/01ryk1543","country_code":"GB","type":"education","lineage":["https://openalex.org/I43439940"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Geoff Neumann","raw_affiliation_strings":["University of Southampton, UK"],"affiliations":[{"raw_affiliation_string":"University of Southampton, UK","institution_ids":["https://openalex.org/I43439940"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089744697","display_name":"David Whitehead","orcid":null},"institutions":[{"id":"https://openalex.org/I2799614521","display_name":"Royal Botanic Gardens, Kew","ror":"https://ror.org/00ynnr806","country_code":"GB","type":"archive","lineage":["https://openalex.org/I2799614521"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"David Whitehead","raw_affiliation_strings":["Royal Botanic Gardens Kew, UK"],"affiliations":[{"raw_affiliation_string":"Royal Botanic Gardens Kew, UK","institution_ids":["https://openalex.org/I2799614521"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5088719080"],"corresponding_institution_ids":["https://openalex.org/I43439940"],"apc_list":null,"apc_paid":null,"fwci":3.2775,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.9340077,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"82","last_page":"88"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10574","display_name":"Crime Patterns and Interventions","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10574","display_name":"Crime Patterns and Interventions","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12519","display_name":"Cybercrime and Law Enforcement Studies","score":0.9574999809265137,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13398","display_name":"Data Analysis with R","score":0.944599986076355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7371416687965393},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.6535830497741699},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.6026009321212769},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.5754267573356628},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.504243016242981},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5039154887199402},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.4659940004348755},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4657455384731293},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4219488501548767},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.25941962003707886}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7371416687965393},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.6535830497741699},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.6026009321212769},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5754267573356628},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.504243016242981},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5039154887199402},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.4659940004348755},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4657455384731293},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4219488501548767},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25941962003707886}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3394332.3402838","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3394332.3402838","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"12th ACM Conference on Web Science Companion","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.soton.ac.uk:441265","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.soton.ac.uk/441265/1/WebSci_2020_STAIDCC_middleton_accepted.pdf","source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:oai:cris.unibo.it:11585/900838","is_oa":true,"landing_page_url":"https://hdl.handle.net/11585/900838","pdf_url":null,"source":{"id":"https://openalex.org/S4306402579","display_name":"Archivio istituzionale della ricerca (Alma Mater Studiorum Universit\u00e0 di Bologna)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210117483","host_organization_name":"Istituto di Ematologia di Bologna","host_organization_lineage":["https://openalex.org/I4210117483"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:eprints.soton.ac.uk:441265","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.soton.ac.uk/441265/1/WebSci_2020_STAIDCC_middleton_accepted.pdf","source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.6100000143051147}],"awards":[{"id":"https://openalex.org/G1219560291","display_name":null,"funder_award_id":"Economic","funder_id":"https://openalex.org/F4320334630","funder_display_name":"Economic and Social Research Council"},{"id":"https://openalex.org/G5907194098","display_name":"FloraGuard: ItTacking the illegal trade in endangered plants","funder_award_id":"ES/R003254/1","funder_id":"https://openalex.org/F4320334630","funder_display_name":"Economic and Social Research Council"},{"id":"https://openalex.org/G594587828","display_name":null,"funder_award_id":"ES/R003254/1","funder_id":"https://openalex.org/F4320334630","funder_display_name":"Economic and Social Research Council"}],"funders":[{"id":"https://openalex.org/F4320334630","display_name":"Economic and Social Research Council","ror":"https://ror.org/03n0ht308"},{"id":"https://openalex.org/F4320337719","display_name":"Defence and Security Accelerator","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3034231332.pdf","grobid_xml":"https://content.openalex.org/works/W3034231332.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W2014184612","https://openalex.org/W2024919586","https://openalex.org/W2040007391","https://openalex.org/W2162357792","https://openalex.org/W2250463012","https://openalex.org/W2289499055","https://openalex.org/W2465878642","https://openalex.org/W2763496252","https://openalex.org/W2790344751","https://openalex.org/W2805255244","https://openalex.org/W2887794975","https://openalex.org/W2910263779","https://openalex.org/W2946693633","https://openalex.org/W2964298336","https://openalex.org/W3011597628"],"related_works":["https://openalex.org/W4312773271","https://openalex.org/W4315588616","https://openalex.org/W2769501189","https://openalex.org/W2888805565","https://openalex.org/W2962686197","https://openalex.org/W2207653751","https://openalex.org/W3159709618","https://openalex.org/W2611137333","https://openalex.org/W3005513013","https://openalex.org/W2122605835"],"abstract_inverted_index":{"In":[0],"today's":[1],"online":[2,69,177],"forums":[3,70,178],"and":[4,27,40,53,97,108,144,153,179],"marketplaces":[5],"cybercrime":[6],"activity":[7],"can":[8,41],"often":[9],"be":[10],"found":[11],"lurking":[12],"in":[13,172],"plain":[14],"sight":[15],"behind":[16],"legitimate":[17],"posts.":[18],"Most":[19],"popular":[20],"criminology":[21,52,137],"techniques":[22],"are":[23],"either":[24],"manually":[25],"intensive,":[26],"so":[28],"do":[29],"not":[30],"scale":[31],"well,":[32],"or":[33],"focus":[34],"on":[35,133],"statistical":[36],"summaries":[37],"across":[38],"websites":[39],"miss":[42],"infrequent":[43],"behaviour":[44],"patterns.":[45],"We":[46,122],"present":[47],"an":[48,115],"inter-disciplinary":[49],"(computer":[50],"science,":[51],"conservation":[54,141],"science)":[55],"socio-technical":[56],"artificial":[57],"intelligence":[58,118],"(AI)":[59],"approach":[60,125],"to":[61,100],"information":[62,109],"extraction":[63,110],"from":[64],"the":[65,173],"long":[66,174],"tail":[67,175],"of":[68,75,85,176],"around":[71],"internet-facilitated":[72],"illegal":[73],"trades":[74],"endangered":[76,88],"species.":[77],"Our":[78],"methodology":[79],"is":[80,120],"highly":[81],"iterative,":[82],"taking":[83],"entities":[84,171],"interest":[86],"(e.g.":[87],"plant":[89],"species,":[90],"suspects,":[91],"locations)":[92],"identified":[93],"by":[94,140],"a":[95,134],"criminologist":[96],"using":[98,126],"them":[99],"direct":[101],"computer":[102],"science":[103,142],"tools":[104],"including":[105],"crawling,":[106],"searching":[107],"over":[111],"many":[112],"steps":[113],"until":[114],"acceptable":[116],"resulting":[117],"package":[119],"achieved.":[121],"evaluate":[123,145],"our":[124],"two":[127],"case":[128],"study":[129],"experiments,":[130],"each":[131],"based":[132],"one-week":[135],"duration":[136],"investigation":[138],"(aided":[139],"experts)":[143],"both":[146],"named":[147],"entity":[148],"(NE)":[149],"directed":[150,161],"graph":[151,162],"visualization":[152,163],"Latent":[154],"Dirichlet":[155],"Allocation":[156],"(LDA)":[157],"topic":[158,166],"modelling.":[159],"NE":[160],"consistently":[164],"outperforms":[165],"modelling":[167],"for":[168],"discovering":[169],"connected":[170],"marketplaces.":[180]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2020-06-19T00:00:00"}
