{"id":"https://openalex.org/W4388032184","doi":"https://doi.org/10.48550/arxiv.2310.17714","title":"Nearest Neighbor Search over Vectorized Lexico-Syntactic Patterns for Relation Extraction from Financial Documents","display_name":"Nearest Neighbor Search over Vectorized Lexico-Syntactic Patterns for Relation Extraction from Financial Documents","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4388032184","doi":"https://doi.org/10.48550/arxiv.2310.17714"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2310.17714","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.17714","pdf_url":"https://arxiv.org/pdf/2310.17714","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2310.17714","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084786836","display_name":"Pawan Kumar Rajpoot","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rajpoot, Pawan Kumar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5109937799","display_name":"Ankur P. Parikh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parikh, Ankur","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5084786836"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9794999957084656,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7667523622512817},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.7025272846221924},{"id":"https://openalex.org/keywords/relationship-extraction","display_name":"Relationship extraction","score":0.6260902285575867},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5624818801879883},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.530285656452179},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5015664100646973},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4775006175041199},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.4219702482223511},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37230998277664185},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3411575257778168},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08665362000465393}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7667523622512817},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.7025272846221924},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.6260902285575867},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5624818801879883},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.530285656452179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5015664100646973},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4775006175041199},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.4219702482223511},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37230998277664185},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3411575257778168},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08665362000465393},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2310.17714","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.17714","pdf_url":"https://arxiv.org/pdf/2310.17714","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2310.17714","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2310.17714","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2310.17714","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.17714","pdf_url":"https://arxiv.org/pdf/2310.17714","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388032184.pdf","grobid_xml":"https://content.openalex.org/works/W4388032184.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2976808399","https://openalex.org/W2609844752","https://openalex.org/W2981341912","https://openalex.org/W4285246823","https://openalex.org/W4226278302","https://openalex.org/W4385734297","https://openalex.org/W4221160509","https://openalex.org/W2547211086","https://openalex.org/W2538200646","https://openalex.org/W2888033806"],"abstract_inverted_index":{"Relation":[0],"extraction":[1],"(RE)":[2],"has":[3],"achieved":[4],"remarkable":[5],"progress":[6],"with":[7,69],"the":[8,139,169],"help":[9],"of":[10,21,88,100,127,176],"pre-trained":[11],"language":[12,33,55],"models.":[13],"However,":[14],"existing":[15],"RE":[16],"models":[17,42,56],"are":[18,43,178],"usually":[19],"incapable":[20],"handling":[22],"two":[23],"situations:":[24],"implicit":[25,70],"expressions":[26],"and":[27,35,41,102,130,148,180],"long-tail":[28],"relation":[29],"classes,":[30],"caused":[31],"by":[32],"complexity":[34],"data":[36],"sparsity.":[37],"Further,":[38],"these":[39],"approaches":[40],"largely":[44],"inaccessible":[45],"to":[46,53,95,137],"users":[47],"who":[48],"don't":[49],"have":[50],"direct":[51],"access":[52],"large":[54],"(LLMs)":[57],"and/or":[58],"infrastructure":[59],"for":[60,166],"supervised":[61],"training":[62,115],"or":[63],"fine-tuning.":[64],"Rule-based":[65],"systems":[66,97],"also":[67,183],"struggle":[68],"expressions.":[71],"Apart":[72],"from":[73],"this,":[74],"Real":[75],"world":[76],"financial":[77],"documents":[78],"such":[79],"as":[80],"various":[81],"10-X":[82],"reports":[83],"(including":[84],"10-K,":[85],"10-Q,":[86],"etc.)":[87],"publicly":[89],"traded":[90],"companies":[91],"pose":[92],"another":[93],"challenge":[94],"rule-based":[96],"in":[98,168],"terms":[99],"longer":[101],"complex":[103],"sentences.":[104],"In":[105],"this":[106],"paper,":[107],"we":[108],"introduce":[109],"a":[110,121,132,163,173],"simple":[111,133],"approach":[112,145],"that":[113,150,159],"consults":[114],"relations":[116],"at":[117],"test":[118],"time":[119],"through":[120],"nearest-neighbor":[122],"search":[123],"over":[124],"dense":[125],"vectors":[126],"lexico-syntactic":[128],"patterns":[129],"provides":[131],"yet":[134],"effective":[135],"means":[136],"tackle":[138],"above":[140],"issues.":[141],"We":[142,156],"evaluate":[143],"our":[144,151],"on":[146],"REFinD":[147],"show":[149,158],"method":[152],"achieves":[153],"state-of-the-art":[154],"performance.":[155],"further":[157],"it":[160,181],"can":[161,188],"provide":[162,189],"good":[164],"start":[165],"human":[167],"loop":[170],"setup":[171],"when":[172,185],"small":[174],"number":[175],"annotations":[177],"available":[179],"is":[182],"beneficial":[184],"domain":[186],"experts":[187],"high":[190],"quality":[191],"patterns.":[192]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
