{"id":"https://openalex.org/W2798978891","doi":"https://doi.org/10.1145/3209978.3210015","title":"A Dataset and an Examination of Identifying Passages for Due Diligence","display_name":"A Dataset and an Examination of Identifying Passages for Due Diligence","publication_year":2018,"publication_date":"2018-06-27","ids":{"openalex":"https://openalex.org/W2798978891","doi":"https://doi.org/10.1145/3209978.3210015","mag":"2798978891"},"language":"en","primary_location":{"id":"doi:10.1145/3209978.3210015","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3209978.3210015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 41st International ACM SIGIR Conference on Research &amp; Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048499958","display_name":"Adam Roegiest","orcid":"https://orcid.org/0000-0003-1265-8881"},"institutions":[{"id":"https://openalex.org/I4210127351","display_name":"Cisco Systems (Canada)","ror":"https://ror.org/02af0qw97","country_code":"CA","type":"company","lineage":["https://openalex.org/I135428043","https://openalex.org/I4210127351"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Adam Roegiest","raw_affiliation_strings":["Kira Systems, Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Kira Systems, Toronto, ON, Canada","institution_ids":["https://openalex.org/I4210127351"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012818010","display_name":"Alexander K. Hudek","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127351","display_name":"Cisco Systems (Canada)","ror":"https://ror.org/02af0qw97","country_code":"CA","type":"company","lineage":["https://openalex.org/I135428043","https://openalex.org/I4210127351"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Alexander K. Hudek","raw_affiliation_strings":["Kira Systems, Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Kira Systems, Toronto, ON, Canada","institution_ids":["https://openalex.org/I4210127351"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029852612","display_name":"Anne McNulty","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127351","display_name":"Cisco Systems (Canada)","ror":"https://ror.org/02af0qw97","country_code":"CA","type":"company","lineage":["https://openalex.org/I135428043","https://openalex.org/I4210127351"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Anne McNulty","raw_affiliation_strings":["Kira Systems, Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Kira Systems, Toronto, ON, Canada","institution_ids":["https://openalex.org/I4210127351"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5048499958"],"corresponding_institution_ids":["https://openalex.org/I4210127351"],"apc_list":null,"apc_paid":null,"fwci":4.0377,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.94495973,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"465","last_page":"474"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9746999740600586,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7521592378616333},{"id":"https://openalex.org/keywords/sequence-labeling","display_name":"Sequence labeling","score":0.7330039143562317},{"id":"https://openalex.org/keywords/conditional-random-field","display_name":"Conditional random field","score":0.6422528028488159},{"id":"https://openalex.org/keywords/due-diligence","display_name":"Due diligence","score":0.6396342515945435},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6110426783561707},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5258777141571045},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5242053270339966},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5195479989051819},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5041199922561646},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4768872559070587},{"id":"https://openalex.org/keywords/diligence","display_name":"Diligence","score":0.4753453731536865},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4375557005405426},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.43553292751312256},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.43159985542297363},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3816331624984741},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32735806703567505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7521592378616333},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.7330039143562317},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.6422528028488159},{"id":"https://openalex.org/C2781016034","wikidata":"https://www.wikidata.org/wiki/Q794134","display_name":"Due diligence","level":2,"score":0.6396342515945435},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6110426783561707},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5258777141571045},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5242053270339966},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5195479989051819},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5041199922561646},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4768872559070587},{"id":"https://openalex.org/C2779820079","wikidata":"https://www.wikidata.org/wiki/Q1427962","display_name":"Diligence","level":2,"score":0.4753453731536865},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4375557005405426},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.43553292751312256},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.43159985542297363},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3816331624984741},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32735806703567505},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3209978.3210015","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3209978.3210015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 41st International ACM SIGIR Conference on Research &amp; Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1480224957","https://openalex.org/W1964076997","https://openalex.org/W1990697041","https://openalex.org/W2029075138","https://openalex.org/W2039190550","https://openalex.org/W2041277986","https://openalex.org/W2051669046","https://openalex.org/W2077428231","https://openalex.org/W2094144755","https://openalex.org/W2096765155","https://openalex.org/W2114006712","https://openalex.org/W2121764873","https://openalex.org/W2123545015","https://openalex.org/W2126105412","https://openalex.org/W2139686264","https://openalex.org/W2147880316","https://openalex.org/W2160218441","https://openalex.org/W2295030615","https://openalex.org/W2296283641","https://openalex.org/W2419329171","https://openalex.org/W2429914308","https://openalex.org/W2576830931","https://openalex.org/W2577511878","https://openalex.org/W2736077928","https://openalex.org/W2742436952","https://openalex.org/W2772524198","https://openalex.org/W2781361141","https://openalex.org/W2789669965","https://openalex.org/W2950133940","https://openalex.org/W2963940534"],"related_works":["https://openalex.org/W2798423868","https://openalex.org/W2962906565","https://openalex.org/W3027026357","https://openalex.org/W3121861178","https://openalex.org/W2921033060","https://openalex.org/W2721902206","https://openalex.org/W2061027419","https://openalex.org/W2076440176","https://openalex.org/W54817952","https://openalex.org/W2109665237"],"abstract_inverted_index":{"We":[0],"present":[1,78],"and":[2,34,60,64,110,117,127],"formalize":[3],"the":[4,32,41,85,88,94,146],"due":[5,42,95],"diligence":[6,43,96],"problem,":[7],"where":[8],"lawyers":[9],"extract":[10],"data":[11],"from":[12],"legal":[13],"documents":[14,59,105],"to":[15,83,144],"assess":[16],"risk":[17],"in":[18,49,93,135],"a":[19,37,66],"potential":[20],"merger":[21],"or":[22],"acquisition,":[23],"as":[24,106],"an":[25,138],"information":[26,91],"retrieval":[27],"task.":[28],"Furthermore,":[29],"we":[30,77,80,100,142],"describe":[31],"creation":[33],"annotation":[35],"of":[36,68,87,108,140],"document":[38],"collection":[39],"for":[40,90,122],"problem":[44],"that":[45,102],"will":[46],"foster":[47],"research":[48],"this":[50,75,136],"area.":[51],"This":[52],"dataset":[53],"comprises":[54],"50":[55],"topics":[56],"over":[57],"4,412":[58],"~15":[61],"million":[62],"sentences":[63],"is":[65,137,152],"subset":[67],"our":[69],"own":[70],"internal":[71],"training":[72],"data.":[73],"Using":[74],"dataset,":[76],"what":[79,141],"have":[81],"found":[82],"be":[84,145],"state":[86],"art":[89],"extraction":[92,151],"problem.":[97],"In":[98],"particular,":[99],"find":[101],"when":[103,150],"treating":[104],"sequences":[107],"labelled":[109],"unlabelled":[111],"sentences,":[112],"Conditional":[113],"Random":[114],"Fields":[115],"significantly":[116],"substantially":[118],"outperform":[119],"other":[120],"techniques":[121],"sequence-based":[123],"(Hidden":[124],"Markov":[125],"Models)":[126],"non-sequence":[128],"based":[129,154],"machine":[130],"learning":[131],"(logistic":[132],"regression).":[133],"Included":[134],"analysis":[139],"perceive":[143],"major":[147],"failure":[148],"cases":[149],"performed":[153],"upon":[155],"sentence":[156],"labels.":[157]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
