{"id":"https://openalex.org/W2898335306","doi":"https://doi.org/10.1145/3276520","title":"FlashProfile: a framework for synthesizing data profiles","display_name":"FlashProfile: a framework for synthesizing data profiles","publication_year":2018,"publication_date":"2018-10-24","ids":{"openalex":"https://openalex.org/W2898335306","doi":"https://doi.org/10.1145/3276520","mag":"2898335306"},"language":"en","primary_location":{"id":"doi:10.1145/3276520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3276520","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3276520","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3276520","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Saswat Padhi","orcid":null},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Saswat Padhi","raw_affiliation_strings":["University of California at Los Angeles, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California at Los Angeles, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Prateek Jain","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prateek Jain","raw_affiliation_strings":["Microsoft Research Lab, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Lab, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Daniel Perelman","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Perelman","raw_affiliation_strings":["Microsoft, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Oleksandr Polozov","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Oleksandr Polozov","raw_affiliation_strings":["Microsoft Research, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sumit Gulwani","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sumit Gulwani","raw_affiliation_strings":["Microsoft, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":null,"display_name":"Todd Millstein","orcid":null},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Todd Millstein","raw_affiliation_strings":["University of California at Los Angeles, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California at Los Angeles, USA","institution_ids":["https://openalex.org/I161318765"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I161318765"],"apc_list":null,"apc_paid":null,"fwci":2.4672,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.9201708,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"2","issue":"OOPSLA","first_page":"1","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.2694999873638153,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.2694999873638153,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.11029999703168869,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.04500000178813934,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.6589000225067139},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.6098999977111816},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5490000247955322},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5400000214576721},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5242000222206116},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4431000053882599},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.37369999289512634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.850600004196167},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.6589000225067139},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.6098999977111816},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5490000247955322},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5400000214576721},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5242000222206116},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4555000066757202},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4431000053882599},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38510000705718994},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35580000281333923},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.3452000021934509},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C2986991398","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntactic structure","level":3,"score":0.27469998598098755},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.25920000672340393}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3276520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3276520","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3276520","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1709.05725","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1709.05725","pdf_url":"https://arxiv.org/pdf/1709.05725","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3276520","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3276520","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3276520","source":{"id":"https://openalex.org/S4210216081","display_name":"Proceedings of the ACM on Programming Languages","issn_l":"2475-1421","issn":["2475-1421"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Programming Languages","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3143604296","display_name":null,"funder_award_id":"CCF-1527923","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307764","display_name":"Microsoft","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320308943","display_name":"Microsoft Research","ror":"https://ror.org/00d0nc645"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2898335306.pdf","grobid_xml":"https://content.openalex.org/works/W2898335306.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W760598031","https://openalex.org/W1647671624","https://openalex.org/W1679901020","https://openalex.org/W1694295455","https://openalex.org/W1791762382","https://openalex.org/W1987538593","https://openalex.org/W1989445634","https://openalex.org/W1992419399","https://openalex.org/W2009511046","https://openalex.org/W2022858489","https://openalex.org/W2049309829","https://openalex.org/W2056138823","https://openalex.org/W2060610732","https://openalex.org/W2067304854","https://openalex.org/W2129066856","https://openalex.org/W2144951274","https://openalex.org/W2149236697","https://openalex.org/W2153233077","https://openalex.org/W2171313960","https://openalex.org/W2238673293","https://openalex.org/W2240240997","https://openalex.org/W2494400401","https://openalex.org/W2496170334","https://openalex.org/W2499791918","https://openalex.org/W2911964244","https://openalex.org/W4229675450","https://openalex.org/W4237412827","https://openalex.org/W4249682906","https://openalex.org/W6668990524"],"related_works":[],"abstract_inverted_index":{"We":[0,98,121],"address":[1],"the":[2,23,27,51,65],"problem":[3,104],"of":[4,12,17,54,61,82,96,105,133,146,176,194],"learning":[5],"a":[6,10,15,79,103,123,130,143,149,172],"syntactic":[7,24,41,100,110,186],"profile":[8],"for":[9,125,138,190],"collection":[11],"strings,":[13],"i.e.":[14,196],"set":[16,81],"regex-like":[18],"patterns":[19,84,115],"that":[20,116,135,183],"succinctly":[21,117],"describe":[22,118],"variations":[25],"in":[26,39,70,200],"strings.":[28],"Real-world":[29],"datasets,":[30,169],"typically":[31],"curated":[32],"from":[33],"multiple":[34],"sources,":[35],"often":[36],"contain":[37],"data":[38,45,55,62],"various":[40],"formats.":[42],"Thus,":[43],"any":[44],"processing":[46],"task":[47],"is":[48,68],"preceded":[49],"by":[50,113,141],"critical":[52],"step":[53],"format":[56],"identification.":[57],"However,":[58],"manual":[59],"inspection":[60],"to":[63,78,185],"identify":[64],"different":[66],"formats":[67],"infeasible":[69],"standard":[71],"big-data":[72],"scenarios.":[73],"Prior":[74],"techniques":[75],"are":[76],"restricted":[77],"small":[80],"pre-defined":[83],"(e.g.":[85],"digits,":[86],"letters,":[87],"words":[88],"etc.),":[89],"and":[90],"provide":[91],"no":[92],"control":[93],"over":[94,129,165],"granularity":[95],"profiles.":[97],"define":[99],"profiling":[101,174],"as":[102,160,205],"clustering":[106],"strings":[107],"based":[108],"on":[109],"similarity,":[111],"followed":[112],"identifying":[114],"each":[119],"cluster.":[120],"present":[122],"technique":[124,159],"synthesizing":[126],"such":[127,204],"profiles":[128,187],"given":[131],"language":[132],"patterns,":[134],"also":[136],"allows":[137],"interactive":[139],"refinement":[140],"requesting":[142],"desired":[144],"number":[145],"clusters.":[147],"Using":[148],"state-of-the-art":[150],"inductive":[151],"synthesis":[152,193],"framework,":[153],"PROSE,":[154],"we":[155,170,181],"have":[156],"implemented":[157],"our":[158],"FlashProfile.":[161],"Across":[162],"153":[163],"tasks":[164],"75":[166],"large":[167],"real":[168],"observe":[171],"median":[173],"time":[175],"only":[177],"\u223c":[178],"0.7s.":[179],"Furthermore,":[180],"show":[182],"access":[184],"may":[188],"allow":[189],"more":[191],"accurate":[192],"programs,":[195],"using":[197],"fewer":[198],"examples,":[199],"programming-by-example":[201],"(PBE)":[202],"workflows":[203],"Flash":[206],"Fill.":[207]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2018-11-02T00:00:00"}
