{"id":"https://openalex.org/W2058494157","doi":"https://doi.org/10.1145/1247480.1247599","title":"Building statistical models and scoring with UDFs","display_name":"Building statistical models and scoring with UDFs","publication_year":2007,"publication_date":"2007-06-11","ids":{"openalex":"https://openalex.org/W2058494157","doi":"https://doi.org/10.1145/1247480.1247599","mag":"2058494157"},"language":"en","primary_location":{"id":"doi:10.1145/1247480.1247599","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1247480.1247599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2007 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031610238","display_name":"Carlos Ordo\u0144\u1ebdz","orcid":"https://orcid.org/0009-0005-1135-9726"},"institutions":[{"id":"https://openalex.org/I44461941","display_name":"University of Houston","ror":"https://ror.org/048sx0r50","country_code":"US","type":"education","lineage":["https://openalex.org/I44461941"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Carlos Ordonez","raw_affiliation_strings":["University of Houston, Houston, TX","University of Houston , Houston, TX"],"affiliations":[{"raw_affiliation_string":"University of Houston, Houston, TX","institution_ids":["https://openalex.org/I44461941"]},{"raw_affiliation_string":"University of Houston , Houston, TX","institution_ids":["https://openalex.org/I44461941"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5031610238"],"corresponding_institution_ids":["https://openalex.org/I44461941"],"apc_list":null,"apc_paid":null,"fwci":7.4679,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.96778705,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1005","last_page":"1016"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7715237140655518},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5376784205436707},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.5234874486923218},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.49941182136535645},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.4765932261943817},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.4622836410999298},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.45323941111564636},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.373763769865036},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10885775089263916},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10626605153083801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7715237140655518},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5376784205436707},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.5234874486923218},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.49941182136535645},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.4765932261943817},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.4622836410999298},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.45323941111564636},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.373763769865036},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10885775089263916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10626605153083801},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1247480.1247599","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1247480.1247599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2007 ACM SIGMOD international conference on Management of data","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.701.1838","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.701.1838","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www2.cs.uh.edu/%7Eordonez/w-pdf/w-2007-SIGMOD-udfmvs.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4000000059604645,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W147860157","https://openalex.org/W1480376833","https://openalex.org/W1487902141","https://openalex.org/W1547939638","https://openalex.org/W1573373104","https://openalex.org/W1977496278","https://openalex.org/W1991271936","https://openalex.org/W1993626269","https://openalex.org/W1996006113","https://openalex.org/W2033139852","https://openalex.org/W2063680791","https://openalex.org/W2064803206","https://openalex.org/W2067650870","https://openalex.org/W2095897464","https://openalex.org/W2102951036","https://openalex.org/W2103139809","https://openalex.org/W2121737596","https://openalex.org/W2143979434","https://openalex.org/W2145829184","https://openalex.org/W2166559705","https://openalex.org/W2206925937","https://openalex.org/W2751199251","https://openalex.org/W3143471909","https://openalex.org/W4235011515","https://openalex.org/W4248008732"],"related_works":["https://openalex.org/W2948670949","https://openalex.org/W4288047943","https://openalex.org/W2804364458","https://openalex.org/W4232484699","https://openalex.org/W4298130764","https://openalex.org/W2132641928","https://openalex.org/W4310225030","https://openalex.org/W2090259340","https://openalex.org/W2083665254","https://openalex.org/W2990655940"],"abstract_inverted_index":{"Multidimensional":[0],"statistical":[1,19],"models":[2,20,230],"are":[3,21,41,63,74,95,122,212,219],"generally":[4,123],"computed":[5,22],"outside":[6,145,205,239],"a":[7,27,44,77,81,86,90,171,183,244],"relational":[8],"DBMS,":[9],"exporting":[10],"data":[11,46,82,128,190,265],"sets.":[12,191,266],"This":[13],"article":[14],"explains":[15],"how":[16,56,132,151],"fundamental":[17],"multidimensional":[18],"inside":[23,199],"the":[24,66,106,112,127,133,139,146,200,233,240],"DBMS":[25,241],"in":[26,43,170,242],"single":[28,172],"table":[29,173,257],"scan":[30],"exploiting":[31],"SQL":[32,159,196,215],"and":[33,61,79,100,111,182,195,217,248,253],"User-Defined":[34],"Functions":[35],"(UDFs).":[36],"The":[37],"techniques":[38],"described":[39],"herein":[40],"used":[42],"commercial":[45],"mining":[47],"tool,":[48],"called":[49],"Teradata":[50,67],"Warehouse":[51],"Miner.":[52],"Specifically,":[53],"we":[54,130,162],"explain":[55,131,150],"correlation,":[57],"linear":[58,104,107],"regression,":[59],"PCA":[60],"clustering,":[62],"integrated":[64],"into":[65],"DBMS.":[68,147],"Two":[69],"major":[70],"database":[71],"processing":[72],"tasks":[73],"discussed:":[75],"building":[76],"model":[78,91,140],"scoring":[80],"set":[83,184],"based":[84,231],"on":[85,206,232],"model.":[87],"To":[88],"build":[89,138],"two":[92,164],"summary":[93,155,180,234],"matrices":[94,121,156,181,235],"shown":[96],"to":[97,137,152,178,188,225,262],"be":[98,142,237],"common":[99],"essential":[101],"for":[102],"all":[103],"models:":[105],"sum":[108,114],"of":[109,115,117,166,185],"points":[110],"quadratic":[113],"cross-products":[116],"points.":[118],"Since":[119],"such":[120],"significantly":[124],"smaller":[125],"than":[126,214,222],"set,":[129],"remaining":[134],"matrix":[135],"operations":[136],"can":[141,236],"quickly":[143],"performed":[144],"We":[148],"first":[149],"efficiently":[153],"compute":[154,179],"with":[157,202],"plain":[158],"queries.":[160],"Then":[161],"present":[163],"sets":[165],"UDFs":[167,187,194,211,218,250],"that":[168],"work":[169],"scan:":[174],"an":[175],"aggregate":[176],"UDF":[177],"scalar":[186,249],"score":[189],"Experiments":[192],"compare":[193],"queries":[197,216],"(running":[198,204],"DBMS)":[201],"C++":[203],"exported":[207],"files).":[208],"In":[209],"general,":[210],"faster":[213],"more":[220],"efficient":[221],"C++,":[223],"due":[224],"long":[226],"export":[227],"times.":[228],"Statistical":[229],"built":[238],"just":[243],"few":[245],"seconds.":[246],"Aggregate":[247],"scale":[251],"linearly":[252],"require":[254],"only":[255],"one":[256],"scan,":[258],"making":[259],"them":[260],"ideal":[261],"process":[263],"large":[264]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
