{"id":"https://openalex.org/W2950844738","doi":"https://doi.org/10.18653/v1/p19-1592","title":"Optimal Transport-based Alignment of Learned Character Representations for String Similarity","display_name":"Optimal Transport-based Alignment of Learned Character Representations for String Similarity","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2950844738","doi":"https://doi.org/10.18653/v1/p19-1592","mag":"2950844738"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1592","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1592","pdf_url":"https://www.aclweb.org/anthology/P19-1592.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1592.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112879125","display_name":"Derek Tam","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Derek Tam","raw_affiliation_strings":["College of Information and Computer Sciences, University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"College of Information and Computer Sciences, University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029504586","display_name":"Nicholas Monath","orcid":"https://orcid.org/0000-0002-5135-2423"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicholas Monath","raw_affiliation_strings":["College of Information and Computer Sciences, University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"College of Information and Computer Sciences, University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068953995","display_name":"Ari Kobren","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ari Kobren","raw_affiliation_strings":["College of Information and Computer Sciences, University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"College of Information and Computer Sciences, University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016362319","display_name":"Aaron Traylor","orcid":null},"institutions":[{"id":"https://openalex.org/I175594653","display_name":"John Brown University","ror":"https://ror.org/02ct41q97","country_code":"US","type":"education","lineage":["https://openalex.org/I175594653"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aaron Traylor","raw_affiliation_strings":["Department of Computer Science, Brown University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Brown University","institution_ids":["https://openalex.org/I175594653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106307747","display_name":"Rajarshi Das","orcid":"https://orcid.org/0009-0009-9348-5265"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rajarshi Das","raw_affiliation_strings":["College of Information and Computer Sciences, University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"College of Information and Computer Sciences, University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107835063","display_name":"Andrew McCallum","orcid":null},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew McCallum","raw_affiliation_strings":["College of Information and Computer Sciences, University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"College of Information and Computer Sciences, University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5112879125"],"corresponding_institution_ids":["https://openalex.org/I24603500"],"apc_list":null,"apc_paid":null,"fwci":1.7341,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88632515,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"5907","last_page":"5917"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.743096113204956},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7294759154319763},{"id":"https://openalex.org/keywords/coreference","display_name":"Coreference","score":0.7294164896011353},{"id":"https://openalex.org/keywords/alias","display_name":"Alias","score":0.7154896259307861},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.7153948545455933},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.6164587736129761},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.557031512260437},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5455316305160522},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5322245359420776},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.530414879322052},{"id":"https://openalex.org/keywords/string-metric","display_name":"String metric","score":0.49549996852874756},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.434757262468338},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.369437038898468},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.337058424949646},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33422690629959106},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32199546694755554},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.31857913732528687},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.2674143314361572},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.26590263843536377},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13942357897758484},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.11424776911735535},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.08026859164237976}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.743096113204956},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7294759154319763},{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.7294164896011353},{"id":"https://openalex.org/C46681722","wikidata":"https://www.wikidata.org/wiki/Q4725589","display_name":"Alias","level":2,"score":0.7154896259307861},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.7153948545455933},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.6164587736129761},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.557031512260437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5455316305160522},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5322245359420776},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.530414879322052},{"id":"https://openalex.org/C22820288","wikidata":"https://www.wikidata.org/wiki/Q9050568","display_name":"String metric","level":4,"score":0.49549996852874756},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.434757262468338},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.369437038898468},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.337058424949646},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33422690629959106},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32199546694755554},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.31857913732528687},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.2674143314361572},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26590263843536377},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13942357897758484},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.11424776911735535},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.08026859164237976},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/p19-1592","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1592","pdf_url":"https://www.aclweb.org/anthology/P19-1592.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1592","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1592","pdf_url":"https://www.aclweb.org/anthology/P19-1592.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1667745721","display_name":"DMREF: Collaborative Research: The Synthesis Genome: Data Mining for Synthesis of New Materials","funder_award_id":"1534431","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2011866053","display_name":"III: Medium: Constructing Knowledge Bases by Extracting Entity-Relations and Meanings from Natural Language via \"Universal Schema\"","funder_award_id":"1514053","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2345705215","display_name":null,"funder_award_id":"DMR-1534431","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G414571346","display_name":null,"funder_award_id":"FA8750-13-2-0020","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G4713059963","display_name":null,"funder_award_id":"FA8750","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G5217516660","display_name":null,"funder_award_id":"IIS-1514053","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5921281487","display_name":null,"funder_award_id":"number","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320315474","display_name":"Chan Zuckerberg Initiative","ror":"https://ror.org/02qenvm24"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332749","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521"},{"id":"https://openalex.org/F4320337367","display_name":"Division of Materials Research","ror":"https://ror.org/01pc7k308"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2950844738.pdf","grobid_xml":"https://content.openalex.org/works/W2950844738.grobid-xml"},"referenced_works_count":65,"referenced_works":["https://openalex.org/W509898","https://openalex.org/W46452414","https://openalex.org/W196214544","https://openalex.org/W385466589","https://openalex.org/W658020064","https://openalex.org/W1502916507","https://openalex.org/W1522301498","https://openalex.org/W1526729636","https://openalex.org/W1567679865","https://openalex.org/W1577500738","https://openalex.org/W1828163288","https://openalex.org/W1837702027","https://openalex.org/W1840435438","https://openalex.org/W1938755728","https://openalex.org/W1992442866","https://openalex.org/W2029605947","https://openalex.org/W2064675550","https://openalex.org/W2074231493","https://openalex.org/W2079735306","https://openalex.org/W2087064593","https://openalex.org/W2094728533","https://openalex.org/W2109227373","https://openalex.org/W2112796928","https://openalex.org/W2131193521","https://openalex.org/W2133564696","https://openalex.org/W2140310134","https://openalex.org/W2158131535","https://openalex.org/W2161462756","https://openalex.org/W2164456230","https://openalex.org/W2167146316","https://openalex.org/W2231488453","https://openalex.org/W2234124417","https://openalex.org/W2251827463","https://openalex.org/W2293260246","https://openalex.org/W2338325072","https://openalex.org/W2405925169","https://openalex.org/W2413794162","https://openalex.org/W2470595162","https://openalex.org/W2556873163","https://openalex.org/W2561391765","https://openalex.org/W2586050494","https://openalex.org/W2593825114","https://openalex.org/W2765285316","https://openalex.org/W2787434568","https://openalex.org/W2793477525","https://openalex.org/W2810676764","https://openalex.org/W2949847915","https://openalex.org/W2951559648","https://openalex.org/W2963237040","https://openalex.org/W2963403868","https://openalex.org/W2963405869","https://openalex.org/W2963472233","https://openalex.org/W2963486652","https://openalex.org/W2963506485","https://openalex.org/W2963522640","https://openalex.org/W2963846996","https://openalex.org/W2963965928","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W2964316651","https://openalex.org/W3030787301","https://openalex.org/W3122640307","https://openalex.org/W4230521134","https://openalex.org/W4298333447","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2034042778","https://openalex.org/W2793653273","https://openalex.org/W4206408938","https://openalex.org/W4301653298","https://openalex.org/W2119262534","https://openalex.org/W1970026646","https://openalex.org/W152351265","https://openalex.org/W3042295250","https://openalex.org/W2994651536","https://openalex.org/W4232755842"],"abstract_inverted_index":{"String":[0],"similarity":[1,23,106],"models":[2,107],"are":[3],"vital":[4],"for":[5,20],"record":[6],"linkage,":[7],"entity":[8],"resolution,":[9],"and":[10,50,103,132],"search.":[11],"In":[12],"this":[13],"work,":[14],"we":[15,75],"present":[16],"STANCE-a":[17],"learned":[18],"model":[19],"computing":[21],"the":[22,30,36,52,71,111,147],"of":[24,32,47,97,110,129],"two":[25,66],"strings.":[26],"Our":[27],"approach":[28],"encodes":[29],"characters":[31],"each":[33],"string,":[34],"aligns":[35],"encodings":[37],"using":[38],"Sinkhorn":[39],"Iteration":[40],"(alignment":[41],"is":[42],"posed":[43],"as":[44],"an":[45,127],"instance":[46,128],"optimal":[48],"transport)":[49],"scores":[51],"alignment":[53],"with":[54],"a":[55,138],"convolutional":[56],"neural":[57],"network.":[58],"We":[59,79,91,114],"evaluate":[60],"STANCE's":[61,117],"ability":[62,118],"to":[63,70,119,126,137],"detect":[64],"whether":[65],"strings":[67],"can":[68],"refer":[69],"same":[72],"entity-a":[73],"task":[74],"term":[76],"alias":[77,83],"detection.":[78],"construct":[80],"five":[81,112],"new":[82],"detection":[84],"datasets":[85],"(and":[86],"make":[87],"them":[88],"publicly":[89],"available).":[90],"show":[92,133],"that":[93,134],"STANCE":[94],"(or":[95],"one":[96],"its":[98],"variants)":[99],"outperforms":[100],"both":[101],"state-ofthe-art":[102],"classic,":[104],"parameter-free":[105],"on":[108],"four":[109],"datasets.":[113],"also":[115],"demonstrate":[116],"improve":[120],"downstream":[121],"tasks":[122],"by":[123],"applying":[124],"it":[125,135],"cross-document":[130],"coreference":[131],"leads":[136],"2.8":[139],"point":[140],"improvement":[141],"in":[142],"B":[143],"3":[144],"F1":[145],"over":[146],"previous":[148],"state-of-the-art":[149],"approach.":[150]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
