{"id":"https://openalex.org/W2803771812","doi":"https://doi.org/10.1145/3195727","title":"Finding Parallel Passages in Cultural Heritage Archives","display_name":"Finding Parallel Passages in Cultural Heritage Archives","publication_year":2018,"publication_date":"2018-08-22","ids":{"openalex":"https://openalex.org/W2803771812","doi":"https://doi.org/10.1145/3195727","mag":"2803771812"},"language":"en","primary_location":{"id":"doi:10.1145/3195727","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3195727","pdf_url":null,"source":{"id":"https://openalex.org/S4210184050","display_name":"Journal on Computing and Cultural Heritage","issn_l":"1556-4673","issn":["1556-4673","1556-4711"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal on Computing and Cultural Heritage","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014086872","display_name":"Martyn Harris","orcid":"https://orcid.org/0000-0003-4851-4679"},"institutions":[{"id":"https://openalex.org/I98259816","display_name":"Birkbeck, University of London","ror":"https://ror.org/02mb95055","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I98259816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Martyn Harris","raw_affiliation_strings":["Birkbeck, University of London, Malet Street, London, UK"],"raw_orcid":"https://orcid.org/0000-0003-4851-4679","affiliations":[{"raw_affiliation_string":"Birkbeck, University of London, Malet Street, London, UK","institution_ids":["https://openalex.org/I98259816"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015725705","display_name":"Mark Levene","orcid":"https://orcid.org/0000-0001-8632-4732"},"institutions":[{"id":"https://openalex.org/I98259816","display_name":"Birkbeck, University of London","ror":"https://ror.org/02mb95055","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I98259816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Mark Levene","raw_affiliation_strings":["Birkbeck, University of London, Malet Street, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Birkbeck, University of London, Malet Street, London, UK","institution_ids":["https://openalex.org/I98259816"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015784640","display_name":"Dell Zhang","orcid":"https://orcid.org/0000-0002-8774-3725"},"institutions":[{"id":"https://openalex.org/I98259816","display_name":"Birkbeck, University of London","ror":"https://ror.org/02mb95055","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I98259816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dell Zhang","raw_affiliation_strings":["Birkbeck, University of London, Malet Street, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Birkbeck, University of London, Malet Street, London, UK","institution_ids":["https://openalex.org/I98259816"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111310912","display_name":"Dan Levene","orcid":null},"institutions":[{"id":"https://openalex.org/I43439940","display_name":"University of Southampton","ror":"https://ror.org/01ryk1543","country_code":"GB","type":"education","lineage":["https://openalex.org/I43439940"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Dan Levene","raw_affiliation_strings":["Southampton University, Southampton, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southampton University, Southampton, UK","institution_ids":["https://openalex.org/I43439940"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8448,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.80057295,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"11","issue":"3","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8229490518569946},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.7279354333877563},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6083570718765259},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5792914032936096},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5340081453323364},{"id":"https://openalex.org/keywords/cultural-heritage","display_name":"Cultural heritage","score":0.5204172730445862},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.4732171595096588},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.46879634261131287},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4650973081588745},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44299861788749695},{"id":"https://openalex.org/keywords/hebrew","display_name":"Hebrew","score":0.4333399534225464},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.41604459285736084},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35612452030181885},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3366272449493408},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.30566757917404175},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.186039000749588}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8229490518569946},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.7279354333877563},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6083570718765259},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5792914032936096},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5340081453323364},{"id":"https://openalex.org/C60671577","wikidata":"https://www.wikidata.org/wiki/Q210272","display_name":"Cultural heritage","level":2,"score":0.5204172730445862},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.4732171595096588},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.46879634261131287},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4650973081588745},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44299861788749695},{"id":"https://openalex.org/C91304198","wikidata":"https://www.wikidata.org/wiki/Q9288","display_name":"Hebrew","level":2,"score":0.4333399534225464},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.41604459285736084},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35612452030181885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3366272449493408},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.30566757917404175},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.186039000749588},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3195727","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3195727","pdf_url":null,"source":{"id":"https://openalex.org/S4210184050","display_name":"Journal on Computing and Cultural Heritage","issn_l":"1556-4673","issn":["1556-4673","1556-4711"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal on Computing and Cultural Heritage","raw_type":"journal-article"},{"id":"pmh:oai:eprints.bbk.ac.uk.oai2:21385","is_oa":false,"landing_page_url":"https://eprints.bbk.ac.uk/id/eprint/21385/6/21385a.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400466","display_name":"BIROn (Birkbeck, University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I98259816","host_organization_name":"Birkbeck, University of London","host_organization_lineage":["https://openalex.org/I98259816"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:eprints.soton.ac.uk:426911","is_oa":false,"landing_page_url":"https://eprints.soton.ac.uk/426911/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401019","display_name":"ePrints Soton (University of Southampton)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I43439940","host_organization_name":"University of Southampton","host_organization_lineage":["https://openalex.org/I43439940"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320311904","display_name":"Wellcome Trust","ror":"https://ror.org/029chgv08"},{"id":"https://openalex.org/F4320318751","display_name":"International Seafood Sustainability Foundation","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":72,"referenced_works":["https://openalex.org/W173640133","https://openalex.org/W234245528","https://openalex.org/W659351324","https://openalex.org/W938539187","https://openalex.org/W1029854643","https://openalex.org/W1497309193","https://openalex.org/W1497983610","https://openalex.org/W1503398984","https://openalex.org/W1524281572","https://openalex.org/W1532325895","https://openalex.org/W1538303725","https://openalex.org/W1600537614","https://openalex.org/W1602103716","https://openalex.org/W1607035479","https://openalex.org/W1647671624","https://openalex.org/W1938755728","https://openalex.org/W1972594981","https://openalex.org/W1974360117","https://openalex.org/W1974568263","https://openalex.org/W1981745473","https://openalex.org/W1990061958","https://openalex.org/W1990190154","https://openalex.org/W1995713768","https://openalex.org/W2000246295","https://openalex.org/W2004737172","https://openalex.org/W2005892921","https://openalex.org/W2027447543","https://openalex.org/W2027752285","https://openalex.org/W2029097226","https://openalex.org/W2029203225","https://openalex.org/W2045812729","https://openalex.org/W2058896506","https://openalex.org/W2069870183","https://openalex.org/W2097879961","https://openalex.org/W2100506586","https://openalex.org/W2108872112","https://openalex.org/W2116316001","https://openalex.org/W2124807415","https://openalex.org/W2125359479","https://openalex.org/W2129444086","https://openalex.org/W2136542423","https://openalex.org/W2151401338","https://openalex.org/W2152263452","https://openalex.org/W2158195707","https://openalex.org/W2161563551","https://openalex.org/W2163361328","https://openalex.org/W2163382007","https://openalex.org/W2168859760","https://openalex.org/W2170240176","https://openalex.org/W2217516311","https://openalex.org/W2241862190","https://openalex.org/W2293201672","https://openalex.org/W2306414308","https://openalex.org/W2321436353","https://openalex.org/W2326757031","https://openalex.org/W2398269865","https://openalex.org/W2403480486","https://openalex.org/W2419539795","https://openalex.org/W2489953154","https://openalex.org/W2494469814","https://openalex.org/W2568598316","https://openalex.org/W2581610544","https://openalex.org/W2950133940","https://openalex.org/W3004423609","https://openalex.org/W3044876782","https://openalex.org/W3049711360","https://openalex.org/W3128806671","https://openalex.org/W4213009331","https://openalex.org/W4252733585","https://openalex.org/W4285719527","https://openalex.org/W4298872162","https://openalex.org/W4310711452"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W1503094549","https://openalex.org/W4384486036","https://openalex.org/W135177976","https://openalex.org/W2337920774","https://openalex.org/W4318823662","https://openalex.org/W2886410948","https://openalex.org/W2025875869","https://openalex.org/W3207526114","https://openalex.org/W2800001041"],"abstract_inverted_index":{"It":[0,93],"is":[1,94,149,181],"of":[2,28,84,98,134,153,176],"great":[3],"interest":[4],"to":[5,19,51,122,141],"researchers":[6],"and":[7,82,114,137,145,163],"scholars":[8],"in":[9,34,69,90,110],"many":[10],"disciplines":[11],"(particularly":[12],"those":[13],"working":[14],"on":[15,125,182],"cultural":[16],"heritage":[17],"projects)":[18],"study":[20],"parallel":[21,85],"passages":[22,86],"(i.e.,":[23],"identical":[24],"or":[25,58],"similar":[26],"pieces":[27],"text":[29,36,128],"describing":[30],"the":[31,56,80,96,174,185],"same":[32],"thing)":[33],"digital":[35,75,147],"archives.":[37],"Although":[38],"there":[39],"exist":[40],"a":[41,52,59,74,132,143,150,154],"few":[42],"software":[43],"tools":[44],"for":[45,87],"this":[46,65],"purpose,":[47],"they":[48],"are":[49],"restricted":[50],"specific":[53,60],"domain":[54,89],"(e.g.,":[55,62],"Bible)":[57],"language":[61,158],"Hebrew).":[63],"In":[64],"article,":[66],"we":[67,72],"present":[68],"detail":[70],"how":[71],"build":[73],"infrastructure":[76,148],"that":[77,130,173],"can":[78],"facilitate":[79],"search":[81,179],"discovery":[83],"any":[88,91],"language.":[92],"at":[95],"core":[97],"our":[99,177],"Samtla":[100],"(Search":[101],"And":[102],"Mining":[103],"Tools":[104],"with":[105,112,184],"Linguistic":[106],"Analysis)":[107],"system":[108,117],"designed":[109],"collaboration":[111],"historians":[113],"linguists.":[115],"The":[116,139],"has":[118],"already":[119],"been":[120],"used":[121],"support":[123],"research":[124],"five":[126],"large":[127],"corpora":[129],"span":[131],"number":[133],"different":[135],"domains":[136],"languages.":[138],"key":[140],"such":[142],"domain-independent":[144],"language-independent":[146],"novel":[151],"combination":[152],"character-based":[155],"n":[156],"-gram":[157],"model,":[159],"space-optimized":[160],"suffix":[161],"tree,":[162],"generalized":[164],"edit":[165],"distance.":[166],"A":[167],"comprehensive":[168],"evaluation":[169],"through":[170],"crowdsourcing":[171],"shows":[172],"effectiveness":[175],"system\u2019s":[178],"functionality":[180],"par":[183],"human-level":[186],"performance.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
