{"id":"https://openalex.org/W7123820443","doi":"https://doi.org/10.1145/3769126.3769205","title":"Assessing the performance gap between lexical and semantic models for information retrieval with formulaic legal language","display_name":"Assessing the performance gap between lexical and semantic models for information retrieval with formulaic legal language","publication_year":2025,"publication_date":"2025-06-16","ids":{"openalex":"https://openalex.org/W7123820443","doi":"https://doi.org/10.1145/3769126.3769205"},"language":null,"primary_location":{"id":"doi:10.1145/3769126.3769205","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3769126.3769205","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twentieth International Conference on Artificial Intelligence and Law","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3769126.3769205","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040427931","display_name":"Larissa Mori","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Larissa Mori","raw_affiliation_strings":["Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA"],"raw_orcid":"https://orcid.org/0009-0003-4321-0403","affiliations":[{"raw_affiliation_string":"Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011775253","display_name":"Carlos Sousa Oliveira","orcid":"https://orcid.org/0000-0002-5967-7126"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos Sousa de Oliveira","raw_affiliation_strings":["Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA"],"raw_orcid":"https://orcid.org/0009-0007-8834-1385","affiliations":[{"raw_affiliation_string":"Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000608521","display_name":"Yuehwern Yih","orcid":"https://orcid.org/0000-0003-2087-7718"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuehwern Yih","raw_affiliation_strings":["Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA"],"raw_orcid":"https://orcid.org/0000-0003-2087-7718","affiliations":[{"raw_affiliation_string":"Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017375445","display_name":"Mario Ventresca","orcid":"https://orcid.org/0000-0002-1246-297X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mario Ventresca","raw_affiliation_strings":["Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA"],"raw_orcid":"https://orcid.org/0000-0002-1246-297X","affiliations":[{"raw_affiliation_string":"Edwardson School of Industrial Engineering, Purdue University, West Lafayette, Indiana, USA","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5040427931"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81409474,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"114","last_page":"128"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6258999705314636,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6258999705314636,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.11339999735355377,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.028200000524520874,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5782999992370605},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.45159998536109924},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.426800012588501},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4242999851703644},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3686000108718872},{"id":"https://openalex.org/keywords/semantic-data-model","display_name":"Semantic data model","score":0.3400999903678894},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.30720001459121704},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.30480000376701355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8159000277519226},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6866000294685364},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.588100016117096},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5782999992370605},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.45159998536109924},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4503999948501587},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.426800012588501},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4242999851703644},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3686000108718872},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.34540000557899475},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.30480000376701355},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.3021000027656555},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C67277372","wikidata":"https://www.wikidata.org/wiki/Q7449085","display_name":"Semantic role labeling","level":3,"score":0.2881999909877777},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C173862523","wikidata":"https://www.wikidata.org/wiki/Q5421270","display_name":"Explicit semantic analysis","level":5,"score":0.26420000195503235},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769126.3769205","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3769126.3769205","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twentieth International Conference on Artificial Intelligence and Law","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3769126.3769205","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3769126.3769205","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twentieth International Conference on Artificial Intelligence and Law","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7110444903373718,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2788335187","https://openalex.org/W2896457183","https://openalex.org/W3099950029","https://openalex.org/W3128487045","https://openalex.org/W3156636935","https://openalex.org/W3214042884","https://openalex.org/W4252076394","https://openalex.org/W4386517708","https://openalex.org/W4389571374","https://openalex.org/W4402672062"],"related_works":[],"abstract_inverted_index":{"Legal":[0],"passage":[1],"retrieval":[2,78,105,120],"is":[3,48,74,101,185],"an":[4],"important":[5],"task":[6,27],"that":[7,80,108,139,183],"assists":[8],"legal":[9,21,30,72,89,99],"practitioners":[10],"in":[11,147,163,176,193,214,228],"the":[12,26,37,42,68,160,220,223,231],"time-intensive":[13],"process":[14],"of":[15,28,36,39,41,71,153,196,222,225],"finding":[16],"relevant":[17],"precedents":[18],"to":[19,54,76,123,209,243],"support":[20],"arguments.":[22],"This":[23],"study":[24],"investigates":[25],"retrieving":[29],"passages":[31],"or":[32,60,118],"paragraphs":[33],"from":[34],"decisions":[35],"Court":[38],"Justice":[40],"European":[43],"Union":[44],"(CJEU),":[45],"whose":[46],"language":[47,73,100],"highly":[49],"structured":[50],"and":[51,85,112,126,131,142,169,175,217,234,240],"formulaic,":[52],"leading":[53],"repetitive":[55,69,151],"patterns.":[56],"Understanding":[57],"when":[58,96],"lexical":[59,111,141],"semantic":[61,125],"models":[62,121,144,162,192],"are":[63,81,172,246],"more":[64,82,150,164],"effective":[65],"at":[66],"handling":[67],"nature":[70],"key":[75],"developing":[77],"systems":[79],"accurate,":[83],"efficient,":[84],"transparent":[86],"for":[87,104],"specific":[88],"domains.":[90],"To":[91],"this":[92,97,244],"end,":[93],"we":[94,218],"explore":[95],"routinized":[98],"better":[102,158],"suited":[103],"using":[106],"methods":[107],"rely":[109],"on":[110,205,230],"statistical":[113],"features,":[114],"such":[115],"as":[116],"BM25,":[117],"dense":[119,143,161,191,203],"trained":[122],"capture":[124],"contextual":[127],"information.":[128],"A":[129],"qualitative":[130],"quantitative":[132],"analysis":[133],"with":[134,149],"three":[135],"complementary":[136],"metrics":[137],"shows":[138],"both":[140],"perform":[145],"well":[146],"scenarios":[148,166],"usage":[152],"language,":[154],"whereas":[155],"BM25":[156,184,213],"performs":[157],"than":[159],"nuanced":[165],"where":[167],"repetition":[168],"verbatim":[170],"quotes":[171],"less":[173],"prevalent":[174],"longer":[177],"queries.":[178],"Our":[179],"experiments":[180],"also":[181],"show":[182],"a":[186,202],"strong":[187],"baseline,":[188],"surpassing":[189,212],"off-the-shelf":[190],"4":[194],"out":[195],"7":[197],"performance":[198,233],"metrics.":[199],"However,":[200],"fine-tuning":[201,229],"model":[204],"domain-specific":[206],"data":[207,226],"led":[208],"improved":[210],"performance,":[211],"most":[215],"metrics,":[216],"analyze":[219],"effect":[221],"amount":[224],"used":[227],"model\u2019s":[232],"temporal":[235],"robustness.":[236],"The":[237],"code,":[238],"dataset":[239],"appendix":[241],"related":[242],"work":[245],"available":[247],"on:":[248],"https://github.com/larimo/lexsem-legal-ir.":[249]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-01-14T00:00:00"}
