{"id":"https://openalex.org/W4379534108","doi":"https://doi.org/10.1145/3591287","title":"Repairing Regular Expressions for Extraction","display_name":"Repairing Regular Expressions for Extraction","publication_year":2023,"publication_date":"2023-06-06","ids":{"openalex":"https://openalex.org/W4379534108","doi":"https://doi.org/10.1145/3591287"},"language":"en","primary_location":{"id":"doi:10.1145/3591287","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3591287","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3591287","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3591287","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063355691","display_name":"Nariyoshi Chida","orcid":"https://orcid.org/0000-0002-9542-9234"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Nariyoshi Chida","raw_affiliation_strings":["NTT Social Informatics Laboratories, Japan / Waseda University, Japan"],"raw_orcid":"https://orcid.org/0000-0002-9542-9234","affiliations":[{"raw_affiliation_string":"NTT Social Informatics Laboratories, Japan / Waseda University, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031661604","display_name":"Tachio Terauchi","orcid":"https://orcid.org/0000-0001-5305-4916"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tachio Terauchi","raw_affiliation_strings":["Waseda University, Japan"],"raw_orcid":"https://orcid.org/0000-0001-5305-4916","affiliations":[{"raw_affiliation_string":"Waseda University, Japan","institution_ids":["https://openalex.org/I150744194"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5063355691"],"corresponding_institution_ids":["https://openalex.org/I150744194"],"apc_list":null,"apc_paid":null,"fwci":1.8692,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.90502183,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"7","issue":"PLDI","first_page":"1633","last_page":"1656"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.7667235732078552},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.7082101702690125},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6754688620567322},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5796569585800171},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.49181821942329407},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.4606376588344574},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4530683159828186},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.439378023147583},{"id":"https://openalex.org/keywords/backtracking","display_name":"Backtracking","score":0.4294711947441101},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.255973219871521},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2383880913257599}],"concepts":[{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.7667235732078552},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.7082101702690125},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6754688620567322},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5796569585800171},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49181821942329407},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.4606376588344574},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4530683159828186},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.439378023147583},{"id":"https://openalex.org/C156884757","wikidata":"https://www.wikidata.org/wiki/Q798554","display_name":"Backtracking","level":2,"score":0.4294711947441101},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.255973219871521},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2383880913257599},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3591287","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3591287","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3591287","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3591287","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3591287","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3591287","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3816610186","display_name":null,"funder_award_id":"JP20K20625","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G5444716433","display_name":null,"funder_award_id":"JP22H03570","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G7584280688","display_name":null,"funder_award_id":"JP20H04162","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4379534108.pdf","grobid_xml":"https://content.openalex.org/works/W4379534108.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1574255194","https://openalex.org/W2018706164","https://openalex.org/W2031830821","https://openalex.org/W2059383863","https://openalex.org/W2080666934","https://openalex.org/W2108543348","https://openalex.org/W2275294428","https://openalex.org/W2401610261","https://openalex.org/W2611059838","https://openalex.org/W2897675441","https://openalex.org/W2899462170","https://openalex.org/W2968339949","https://openalex.org/W2979577724","https://openalex.org/W2981296933","https://openalex.org/W2999135213","https://openalex.org/W3011649674","https://openalex.org/W3033889389","https://openalex.org/W3090797304","https://openalex.org/W3094252362","https://openalex.org/W3099242157","https://openalex.org/W3123705249","https://openalex.org/W3163659631","https://openalex.org/W3212912612","https://openalex.org/W4287634532","https://openalex.org/W6891610616"],"related_works":["https://openalex.org/W1583922594","https://openalex.org/W2998448420","https://openalex.org/W1974038726","https://openalex.org/W1953626159","https://openalex.org/W1570166875","https://openalex.org/W2170798490","https://openalex.org/W2024152506","https://openalex.org/W2473935755","https://openalex.org/W4281482860","https://openalex.org/W639355536"],"abstract_inverted_index":{"While":[0],"synthesizing":[1,24,53,186],"and":[2,31,54,70,79,174,187,287,312,322,338,355,367,385],"repairing":[3,26,55,188],"regular":[4],"expressions":[5],"(regexes)":[6],"based":[7,216],"on":[8,182,217,239,370],"Programming-by-Examples":[9],"(PBE)":[10],"methods":[11,184],"have":[12,64,149,351],"seen":[13],"rapid":[14],"progress":[15],"in":[16,134,211,308,347,358],"recent":[17],"years,":[18],"all":[19],"existing":[20,183],"works":[21,214],"only":[22],"support":[23,33],"or":[25],"regexes":[27,56,62,97,189,372],"for":[28,34,52,57,140,177,185,190,227,231,325,365],"membership":[29,142,191,228],"testing,":[30,192],"the":[32,44,48,76,92,95,103,111,116,128,138,141,158,164,171,194,208,212,240,249,261,295,306,309,313,344,348,353,380,383,393],"extraction":[35,345],"is":[36,107,125,133,146,166,225,315],"still":[37],"an":[38,167,277],"open":[39],"problem.":[40,81],"This":[41,132],"paper":[42,165],"fills":[43],"void":[45],"by":[46,153],"proposing":[47],"first":[49,270],"PBE-based":[50,77,172],"method":[51,258],"extraction.":[58,178],"Our":[59,179,377],"work":[60],"supports":[61],"that":[63,86,113,259,340,373,386],"real-world":[65,375],"extensions":[66,73,112,129,204],"such":[67],"as":[68,281,290],"backreferences":[69],"lookarounds.":[71],"The":[72,160,301],"significantly":[74],"affect":[75],"synthesis":[78,173,354],"repair":[80,175,356],"In":[82],"fact,":[83],"we":[84,251,269],"show":[85],"there":[87,106],"are":[88,98,130,205,215,236],"unsolvable":[89],"instances":[90],"of":[91,119,163,221,243,264,276,319,343,382],"problem":[93,123,176],"if":[94,127],"synthesized":[96],"not":[99,230],"allowed":[100],"to":[101,137,148,169,293,304,316],"use":[102,288,323,342],"extensions,":[104],"i.e.,":[105,193],"no":[108],"regex":[109,156,245,265,279,320],"without":[110,157],"correctly":[114],"classify":[115],"given":[117],"set":[118],"examples,":[120],"whereas":[121],"every":[122,144],"instance":[124,145],"solvable":[126],"allowed.":[131],"stark":[135],"contrast":[136],"case":[139],"where":[143],"guaranteed":[147],"a":[150,154,218,253,272,282,291,359],"solution":[151],"expressible":[152],"pure":[155],"extensions.":[159,376],"main":[161],"contribution":[162],"algorithm":[168,180,357,384],"solve":[170],"builds":[181],"enumerative":[195],"search":[196,332,394],"algorithms":[197],"with":[198],"SMT":[199,209,255,297],"constraint":[200,256,298],"solving.":[201],"However,":[202],"significant":[203],"needed":[206],"because":[207,233],"constraints":[210,314],"previous":[213],"non-deterministic":[219],"semantics":[220,224,275,311],"regexes.":[222],"Non-deterministic":[223],"sound":[226],"but":[229],"extraction,":[232],"which":[234],"substrings":[235],"extracted":[237],"depends":[238],"deterministic":[241,262,283],"behavior":[242,263],"actual":[244,278],"engines.":[246,266],"To":[247],"address":[248],"issue,":[250],"propose":[252,329],"new":[254,296,331,388],"generation":[257,299],"respects":[260],"For":[267],"this,":[268],"define":[271],"novel":[273],"formal":[274,310],"engine":[280],"big-step":[284],"operational":[285],"semantics,":[286],"it":[289,369],"basis":[292],"design":[294],"method.":[300],"key":[302],"idea":[303],"simulate":[305],"determinism":[307],"consider":[317],"continuations":[318],"matching":[321],"them":[324],"disambiguation.":[326],"We":[327,350],"also":[328],"two":[330],"space":[333],"pruning":[334,389],"techniques":[335,390],"called":[336,361],"approximation-by-pure-regex":[337],"approximation-by-backreferences":[339],"make":[341],"information":[346],"examples.":[349],"implemented":[352],"tool":[360],"R3":[362],"(Repairing":[363],"Regex":[364],"extRaction)":[366],"evaluated":[368],"50":[371],"contain":[374],"evaluation":[378],"shows":[379],"effectiveness":[381],"our":[387],"substantially":[391],"prune":[392],"space.":[395]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
