{"id":"https://openalex.org/W2554301102","doi":"https://doi.org/10.1109/bigdata.2016.7840981","title":"Exploring archives with probabilistic models: Topic modelling for the valorisation of digitised archives of the European Commission","display_name":"Exploring archives with probabilistic models: Topic modelling for the valorisation of digitised archives of the European Commission","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2554301102","doi":"https://doi.org/10.1109/bigdata.2016.7840981","mag":"2554301102"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2016.7840981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840981","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://biblio.ugent.be/publication/8520997/file/8521049.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006297499","display_name":"Simon Hengchen","orcid":"https://orcid.org/0000-0002-8453-7221"},"institutions":[{"id":"https://openalex.org/I132053463","display_name":"Universit\u00e9 Libre de Bruxelles","ror":"https://ror.org/01r9htc13","country_code":"BE","type":"education","lineage":["https://openalex.org/I132053463"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Simon Hengchen","raw_affiliation_strings":["Universit\u00e9 libre de Bruxelles, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 libre de Bruxelles, Brussels, Belgium","institution_ids":["https://openalex.org/I132053463"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002793148","display_name":"Mathias Coeckelbergs","orcid":null},"institutions":[{"id":"https://openalex.org/I132053463","display_name":"Universit\u00e9 Libre de Bruxelles","ror":"https://ror.org/01r9htc13","country_code":"BE","type":"education","lineage":["https://openalex.org/I132053463"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Mathias Coeckelbergs","raw_affiliation_strings":["Universit\u00e9 libre de Bruxelles, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 libre de Bruxelles, Brussels, Belgium","institution_ids":["https://openalex.org/I132053463"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085864454","display_name":"Seth van Hooland","orcid":null},"institutions":[{"id":"https://openalex.org/I132053463","display_name":"Universit\u00e9 Libre de Bruxelles","ror":"https://ror.org/01r9htc13","country_code":"BE","type":"education","lineage":["https://openalex.org/I132053463"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Seth van Hooland","raw_affiliation_strings":["Universit\u00e9 libre de Bruxelles, Brussels, Belgium"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 libre de Bruxelles, Brussels, Belgium","institution_ids":["https://openalex.org/I132053463"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055871472","display_name":"Ruben Verborgh","orcid":"https://orcid.org/0000-0002-8596-222X"},"institutions":[{"id":"https://openalex.org/I39327780","display_name":"iMinds","ror":"https://ror.org/03baec336","country_code":"BE","type":"nonprofit","lineage":["https://openalex.org/I39327780"]},{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Ruben Verborgh","raw_affiliation_strings":["Ghent University \u2013 iMinds, Ghent, Belgium"],"affiliations":[{"raw_affiliation_string":"Ghent University \u2013 iMinds, Ghent, Belgium","institution_ids":["https://openalex.org/I39327780","https://openalex.org/I32597200"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037564368","display_name":"Thomas Steiner","orcid":"https://orcid.org/0000-0001-7482-6129"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas Steiner","raw_affiliation_strings":["Google Germany, Germany"],"affiliations":[{"raw_affiliation_string":"Google Germany, Germany","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5006297499"],"corresponding_institution_ids":["https://openalex.org/I132053463"],"apc_list":null,"apc_paid":null,"fwci":12.4356,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.98130481,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"2","issue":null,"first_page":"3245","last_page":"3249"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8449174165725708},{"id":"https://openalex.org/keywords/european-commission","display_name":"European commission","score":0.7049956321716309},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6867622137069702},{"id":"https://openalex.org/keywords/european-union","display_name":"European union","score":0.60816890001297},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.5790798664093018},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.527228832244873},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5239443778991699},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5063012838363647},{"id":"https://openalex.org/keywords/commission","display_name":"Commission","score":0.5058245658874512},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.4413905739784241},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.424893856048584},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.15682357549667358},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10518759489059448},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.1034955382347107}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8449174165725708},{"id":"https://openalex.org/C3020782553","wikidata":"https://www.wikidata.org/wiki/Q8880","display_name":"European commission","level":3,"score":0.7049956321716309},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6867622137069702},{"id":"https://openalex.org/C2910001868","wikidata":"https://www.wikidata.org/wiki/Q458","display_name":"European union","level":2,"score":0.60816890001297},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.5790798664093018},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.527228832244873},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5239443778991699},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5063012838363647},{"id":"https://openalex.org/C2776034101","wikidata":"https://www.wikidata.org/wiki/Q1509347","display_name":"Commission","level":2,"score":0.5058245658874512},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.4413905739784241},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.424893856048584},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.15682357549667358},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10518759489059448},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.1034955382347107},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C105639569","wikidata":"https://www.wikidata.org/wiki/Q582577","display_name":"Economic policy","level":1,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/bigdata.2016.7840981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7840981","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:archive.ugent.be:8520997","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-8520997","pdf_url":"https://biblio.ugent.be/publication/8520997/file/8521049.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISBN: 978-1-4673-9005-7","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:dipot.ulb.ac.be:2013/239623","is_oa":false,"landing_page_url":"https://dipot.ulb.ac.be/dspace/bitstream/2013/239623/3/Exploring_archives_probabilistic_models.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306401063","display_name":"D\u00e9p\u00f4t institutionnel de l'Universit\u00e9 libre de Bruxelles (Universit\u00e9 Libre de Bruxelles)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I132053463","host_organization_name":"Universit\u00e9 Libre de Bruxelles","host_organization_lineage":["https://openalex.org/I132053463"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"In: IEEE Proceedings of the First Workshop on Computational Archival Science. IEEE","raw_type":"info:ulb-repo/semantics/openurl/proceeding"}],"best_oa_location":{"id":"pmh:oai:archive.ugent.be:8520997","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-8520997","pdf_url":"https://biblio.ugent.be/publication/8520997/file/8521049.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISBN: 978-1-4673-9005-7","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4099999964237213,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2554301102.pdf","grobid_xml":"https://content.openalex.org/works/W2554301102.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1557560513","https://openalex.org/W1604638557","https://openalex.org/W1673459928","https://openalex.org/W1880262756","https://openalex.org/W1920845339","https://openalex.org/W1982474113","https://openalex.org/W1984550621","https://openalex.org/W2032464653","https://openalex.org/W2106447713","https://openalex.org/W2107743791","https://openalex.org/W2159426623","https://openalex.org/W2169200297","https://openalex.org/W2217516311","https://openalex.org/W2226292825","https://openalex.org/W2250533720","https://openalex.org/W2621472371","https://openalex.org/W4231510805","https://openalex.org/W4233135949","https://openalex.org/W6639619044","https://openalex.org/W6640064808","https://openalex.org/W6683333316","https://openalex.org/W6684503276"],"related_works":["https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2382021449","https://openalex.org/W2095118173","https://openalex.org/W2104269053","https://openalex.org/W2106424170","https://openalex.org/W1985426483","https://openalex.org/W2501188010","https://openalex.org/W4299935056","https://openalex.org/W1971020767"],"abstract_inverted_index":{"Topic":[0],"Modelling":[1],"(TM)":[2],"has":[3],"gained":[4],"momentum":[5],"over":[6],"the":[7,12,30,44,59,64,68,73,100,113,135,140,143,152,157,161],"last":[8],"few":[9],"years":[10],"within":[11],"humanities":[13],"to":[14,61,82,119,137,155],"analyze":[15],"topics":[16,136,159],"represented":[17],"in":[18,109,148],"large":[19,37,126],"volumes":[20],"of":[21,32,39,43,50,67,72,85,102,117,128,139,145,163],"full":[22],"text.":[23],"This":[24],"paper":[25,150],"proposes":[26],"an":[27,110],"experiment":[28],"with":[29,160],"usage":[31],"TM":[33,118],"based":[34],"on":[35,92],"a":[36,83,93,125,164],"subset":[38],"digitized":[40],"archival":[41,104],"holdings":[42],"European":[45,74],"Commission":[46],"(EC).":[47],"Currently,":[48],"millions":[49],"scanned":[51],"and":[52,57,70,95,115],"OCRed":[53],"files":[54],"are":[55,90],"available":[56,91],"hold":[58],"potential":[60],"significantly":[62],"change":[63],"way":[65],"historians":[66],"construction":[69],"evolution":[71],"Union":[75],"can":[76],"perform":[77],"their":[78],"research.":[79],"However,":[80],"due":[81],"lack":[84],"resources,":[86],"only":[87],"minimal":[88],"metadata":[89],"file":[94],"document":[96],"level,":[97],"seriously":[98],"undermining":[99],"accessibility":[101],"this":[103,149],"collection.":[105],"The":[106],"article":[107],"explores":[108],"empirical":[111],"manner":[112],"possibilities":[114],"limits":[116],"automatically":[120],"extract":[121],"key":[122],"concepts":[123],"from":[124],"body":[127],"documents":[129],"spanning":[130],"multiple":[131],"decades.":[132],"By":[133],"mapping":[134],"headings":[138],"EUROVOC":[141],"thesaurus,":[142],"proof":[144],"concept":[146],"described":[147],"offers":[151],"future":[153],"possibility":[154],"represent":[156],"identified":[158],"help":[162],"hierarchical":[165],"search":[166],"interface":[167],"for":[168],"end-users.":[169]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
