From 4918a17e686135603393bfd0b81c33405738cc9c Mon Sep 17 00:00:00 2001 From: David Duvenaud Date: Fri, 4 Mar 2022 18:29:31 -0500 Subject: [PATCH 1/4] More work on types for tables examples. --- examples/data-frames.dx | 494 ++++++++++++++++------------------------ 1 file changed, 202 insertions(+), 292 deletions(-) diff --git a/examples/data-frames.dx b/examples/data-frames.dx index 8dd87750a..e495a88d3 100644 --- a/examples/data-frames.dx +++ b/examples/data-frames.dx @@ -1,6 +1,6 @@ -'# Example tables +import set --- TODO: Index those tables by the string elements or by integers? +'# Example tables def students : (Fin 3)=>{name: String & age: Int & favColor: String} = [ {name="Bob" , age=12, favColor="blue" } @@ -14,102 +14,106 @@ def studentsMissing : (Fin 3)=>{name: String & age:(Maybe Int) & favColor:(Maybe , {name="Eve" , age=Just 13, favColor=Nothing } ] -'## TODO: employees - --- | Last Name | Department ID | --- | ------------ | ------------- | --- | "Rafferty" | 31 | --- | "Jones" | 32 | --- | "Heisenberg" | 33 | --- | "Robinson" | 34 | --- | "Smith" | 34 | --- | "Williams" | | - -'## TODO: departments - --- | Department ID | Department Name | --- | ------------- | --------------- | --- | 31 | "Sales" | --- | 33 | "Engineering" | --- | 34 | "Clerical" | --- | 35 | "Marketing" | - -'## TODO: jellyAnon - --- | get acne | red | black | white | green | yellow | brown | orange | pink | purple | --- | -------- | ----- | ----- | ----- | ----- | ------ | ----- | ------ | ----- | ------ | --- | true | false | false | false | true | false | false | true | false | false | --- | true | false | true | false | true | true | false | false | false | false | --- | false | false | false | false | true | false | false | false | true | false | --- | false | false | false | false | false | true | false | false | false | false | --- | false | false | false | false | false | true | false | false | true | false | --- | true | false | true | false | false | false | false | true | true | false | --- | false | false | true | false | false | false | false | false | true | false | --- | true | false | false | false | false | false | true | true | false | false | --- | true | false | false | false | false | false | false | true | false | false | --- | false | true | false | false | false | true | true | false | true | false | - -'## TODO: jellyNamed - --- | name | get acne | red | black | white | green | yellow | brown | orange | pink | purple | --- | ---------- | -------- | ----- | ----- | ----- | ----- | ------ | ----- | ------ | ----- | ------ | --- | "Emily" | true | false | false | false | true | false | false | true | false | false | --- | "Jacob" | true | false | true | false | true | true | false | false | false | false | --- | "Emma" | false | false | false | false | true | false | false | false | true | false | --- | "Aidan" | false | false | false | false | false | true | false | false | false | false | --- | "Madison" | false | false | false | false | false | true | false | false | true | false | --- | "Ethan" | true | false | true | false | false | false | false | true | true | false | --- | "Hannah" | false | false | true | false | false | false | false | false | true | false | --- | "Matthew" | true | false | false | false | false | false | true | true | false | false | --- | "Hailey" | true | false | false | false | false | false | false | true | false | false | --- | "Nicholas" | false | true | false | false | false | true | true | false | true | false | - -'## TODO: gradebook +def employees : (Fin 6)=>{lastname: String & deptID: Maybe Int} = + [ {lastname="Rafferty" , deptID=Just 31} + , {lastname="Jones" , deptID=Just 32} + , {lastname="Heisenberg", deptID=Just 33} + , {lastname="Robinson" , deptID=Just 34} + , {lastname="Smith" , deptID=Just 34} + , {lastname="Williams" , deptID=Nothing} + ] --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | +def departments : (Fin 4)=>{deptname: String & deptID: Int} = + [ {deptname="Sales" , deptID=31} + , {deptname="Engineering", deptID=33} + , {deptname="Clerical" , deptID=34} + , {deptname="Marketing" , deptID=35} + ] -'## TODO: gradebookMissing +def jellyAnon : (Fin 10)=>{get_acne:Bool & red:Bool & black:Bool & white:Bool & green:Bool & yellow:Bool & brown:Bool & orange:Bool & pink:Bool & purple:Bool} = + [ {get_acne=True, red=False, black=False, white=False, green=True , yellow=False, brown=False, orange=True , pink=False, purple=False} + , {get_acne=True, red=False, black=True, white=False, green=True , yellow=True , brown=False, orange=False, pink=False, purple=False} + , {get_acne=False, red=False, black=False, white=False, green=True , yellow=False, brown=False, orange=False, pink=True , purple=False} + , {get_acne=False, red=False, black=False, white=False, green=False, yellow=True , brown=False, orange=False, pink=False, purple=False} + , {get_acne=False, red=False, black=False, white=False, green=False, yellow=True , brown=False, orange=False, pink=True , purple=False} + , {get_acne=True, red=False, black=True, white=False, green=False, yellow=False, brown=False, orange=True , pink=True , purple=False} + , {get_acne=False, red=False, black=True, white=False, green=False, yellow=False, brown=False, orange=False, pink=True , purple=False} + , {get_acne=True, red=False, black=False, white=False, green=False, yellow=False, brown=True , orange=True , pink=False, purple=False} + , {get_acne=True, red=False, black=False, white=False, green=False, yellow=False, brown=False, orange=True , pink=False, purple=False} + , {get_acne=False, red=True , black=False, white=False, green=False, yellow=True , brown=True , orange=False, pink=True , purple=False} + ] + +def jellyNamed : (Fin 10)=>{name:String & get_acne:Bool & red:Bool & black:Bool & white:Bool & green:Bool & yellow:Bool & brown:Bool & orange:Bool & pink:Bool & purple:Bool} = + [ {name="Emily", get_acne=True, red=False, black=False, white=False, green=True , yellow=False, brown=False, orange=True , pink=False, purple=False} + , {name="Jacob", get_acne=True, red=False, black=True, white=False, green=True , yellow=True , brown=False, orange=False, pink=False, purple=False} + , {name="Emma", get_acne=False, red=False, black=False, white=False, green=True , yellow=False, brown=False, orange=False, pink=True , purple=False} + , {name="Aidan", get_acne=False, red=False, black=False, white=False, green=False, yellow=True , brown=False, orange=False, pink=False, purple=False} + , {name="Madison", get_acne=False, red=False, black=False, white=False, green=False, yellow=True , brown=False, orange=False, pink=True , purple=False} + , {name="Ethan", get_acne=True, red=False, black=True, white=False, green=False, yellow=False, brown=False, orange=True , pink=True , purple=False} + , {name="Hannah", get_acne=False, red=False, black=True, white=False, green=False, yellow=False, brown=False, orange=False, pink=True , purple=False} + , {name="Matthew", get_acne=True, red=False, black=False, white=False, green=False, yellow=False, brown=True , orange=True , pink=False, purple=False} + , {name="Hailey", get_acne=True, red=False, black=False, white=False, green=False, yellow=False, brown=False, orange=True , pink=False, purple=False} + , {name="Nicholas", get_acne=False, red=True , black=False, white=False, green=False, yellow=True , brown=True , orange=False, pink=True , purple=False} + ] + +def gradebook : (Fin 3)=>{name:String & age:Int & quiz1:Int & quiz2:Int & midterm:Int & quiz3:Int & quiz4:Int & final:Int} = + [ {name="Bob" , age=12, quiz1=8, quiz2=9, midterm=77, quiz3=7, quiz4=9, final=87} + , {name="Alice", age=17, quiz1=6, quiz2=8, midterm=88, quiz3=8, quiz4=7, final=85} + , {name="Eve" , age=13, quiz1=7, quiz2=9, midterm=84, quiz3=8, quiz4=8, final=77} + ] + +def gradebookMissing : (Fin 3)=>{name:String & age:Maybe Int & quiz1:Maybe Int & quiz2:Maybe Int & midterm:Maybe Int & quiz3:Maybe Int & quiz4:Maybe Int & final:Maybe Int} = + [ {name="Bob" , age=Just 12, quiz1=Just 8, quiz2=Just 9, midterm=Just 77, quiz3=Just 7, quiz4=Just 9, final=Just 87} + , {name="Alice", age=Just 17, quiz1=Just 6, quiz2=Just 8, midterm=Just 88, quiz3=Nothing, quiz4=Just 7, final=Just 85} + , {name="Eve" , age=Just 13, quiz1=Nothing, quiz2=Just 9, midterm=Just 84, quiz3=Just 8, quiz4=Just 8, final=Just 77} + ] + +def gradebookSeq : (Fin 3)=>{name:String & age:Int & quizzes:(Fin 4=>Int) & midterm:Int & final:Int} = + [ {name="Bob" , age=12, quizzes=[8, 9, 7, 9], midterm=77, final=87} + , {name="Alice", age=17, quizzes=[6, 8, 8, 7], midterm=88, final=85} + , {name="Eve" , age=13, quizzes=[7, 9, 8, 8], midterm=84, final=77} + ] + +def gradebookTable : (Fin 3)=>{name:String & age:Int & quizzes:(Fin 4=>{quiznum:Int & grade:Int}) & midterm:Int & final:Int} = + [ {name="Bob" , age=12, quizzes=[ {quiznum=1, grade=8} + , {quiznum=2, grade=9} + , {quiznum=3, grade=7} + , {quiznum=4, grade=9}], midterm=77, final=87} + , {name="Alice", age=17, quizzes=[ {quiznum=1, grade=6} + , {quiznum=2, grade=8} + , {quiznum=3, grade=8} + , {quiznum=4, grade=7}], midterm=88, final=85} + , {name="Eve" , age=13, quizzes=[ {quiznum=1, grade=7} + , {quiznum=2, grade=9} + , {quiznum=3, grade=8} + , {quiznum=4, grade=8}], midterm=84, final=77} + ] + + +'# Experiment: Indexing by sets + +def get_column {n f a} (c:Label) (t:n=>{@c:a & ...f}) : n=>a = + for i. + {@c=val, ...r} = t.i + val + +student_names = get_column ##name students +studentIx = StringSetIx $ toSet student_names + +students_ix_by_names = for i:studentIx. + {name=_, ...r} = students.((ordinal i)@_) + r + +-- Note: It'd be nice to wrap this logic in a helper function like this: +-- def reindex_table_by_column {n f a} (c:Label) (t:n=>{@c:String & ...f}) : +-- (StringSetIx set)=>{&...f} = +-- ix_vals = get_column c t +-- newIx : Type = StringSetIx $ toSet ix_vals +-- for i:newIx. +-- {@c=_, ...r} = t.((ordinal i)@_) +-- r +-- But that would require delayed resolution of "set". --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | --- | "Alice" | 17 | 6 | 8 | 88 | | 7 | 85 | --- | "Eve" | 13 | | 9 | 84 | 8 | 8 | 77 | - -'## TODO: gradebookSeq - --- | name | age | quizzes | midterm | final | --- | ------- | --- | ------------ | ------- | ----- | --- | "Bob" | 12 | [8, 9, 7, 9] | 77 | 87 | --- | "Alice" | 17 | [6, 8, 8, 7] | 88 | 85 | --- | "Eve" | 13 | [7, 9, 8, 8] | 84 | 77 | - -'## TODO: gradebookTable - --- | name | age | quizzes | midterm | final | --- | ------- | --- | ----------------- | ------- | ----- | --- | "Bob" | 12 | | quiz# | grade | | 77 | 87 | --- | | | | ----- | ----- | | | | --- | | | | 1 | 8 | | | | --- | | | | 2 | 9 | | | | --- | | | | 3 | 7 | | | | --- | | | | 4 | 9 | | | | --- | "Alice" | 17 | | quiz# | grade | | 88 | 85 | --- | | | | ----- | ----- | | | | --- | | | | 1 | 6 | | | | --- | | | | 2 | 8 | | | | --- | | | | 3 | 8 | | | | --- | | | | 4 | 7 | | | | --- | "Eve" | 13 | | quiz# | grade | | 84 | 77 | --- | | | | ----- | ----- | | | | --- | | | | 1 | 7 | | | | --- | | | | 2 | 9 | | | | --- | | | | 3 | 8 | | | | --- | | | | 4 | 8 | | | | '# Table API @@ -123,138 +127,74 @@ def addRows {n m a} (t:n=>a) (t':m=>a) : (n|m)=>a = Left ni -> t.ni Right mi -> t'.mi --- > addRows( --- students, --- [ --- [row: --- ("name", "Colton"), ("age", 19), --- ("favorite color", "blue")] --- ]) --- | name | age | favorite color | --- | -------- | --- | -------------- | --- | "Bob" | 12 | "blue" | --- | "Alice" | 17 | "green" | --- | "Eve" | 13 | "red" | --- | "Colton" | 19 | "blue" | --- > addRows(gradebook, []) --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | +:p addRows students [{name="Colton", age=19, favColor="blue"}] + +:p addRows gradebook [] -- ### `addColumn :: t1:Table * c:ColName * vs:Seq -> t2:Table` def addColumn {n f a} (t:n=>{&...f}) (c:Label) (vs:n=>a) : n=>{@c:a & ...f} = for i. {@c=vs.i, ...t.i} --- > hairColor = ["brown", "red", "blonde"] --- > addColumn(students, "hair-color", hairColor) --- | name | age | favorite color | hair-color | --- | ------- | --- | -------------- | ---------- | --- | "Bob" | 12 | "blue" | "brown" | --- | "Alice" | 17 | "green" | "red" | --- | "Eve" | 13 | "red" | "blonde" | --- > presentation = [9, 9, 6] --- > addColumn(gradebook, "presentation", presentation) --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | presentation | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | ------------ | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | 9 | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | 9 | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | 6 | +hairColor = ["brown", "red", "blonde"] +:p addColumn students ##haircolor hairColor + +presentation = [9, 9, 6] +addColumn gradebook ##presentation presentation + -- ### `buildColumn :: t1:Table * c:ColName * f:(r:Row -> v:Value) -> t2:Table` def buildColumn {n f a} (t:n=>{&...f}) (c:Label) (fn: {&...f} -> a) : n=>{@c:a & ...f} = for i. {@c=fn t.i, ...t.i} --- > isTeenagerBuilder = --- function(r): --- 12 < getValue(r, "age") and getValue(r, "age") < 20 --- end --- > buildColumn(students, "is-teenager", isTeenagerBuilder) --- | name | age | favorite color | is-teenager | --- | ------- | --- | -------------- | ----------- | --- | "Bob" | 12 | "blue" | false | --- | "Alice" | 17 | "green" | true | --- | "Eve" | 13 | "red" | true | --- > didWellInFinal = --- function(r): --- 85 <= getValue(r, "final") --- end --- > buildColumn(gradebook, "did-well-in-final", didWellInFinal) --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | did-well-in-final | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | ----------------- | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | true | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | true | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | false | +:p buildColumn students ##is_teenager \{age, ..._}. + (12 < age ) && (age < 20) -- Very terse syntax! + +:p buildColumn gradebook ##did_well_in_final \{final, ..._}. + 85 <= final + -- ### `vcat :: t1:Table * t2:Table -> t3:Table` def vcat {n m a} (t:n=>a) (t':m=>a) : (n|m)=>a = addRows t t' --- > increaseAge = --- function(r): --- [row: ("age", 1 + getValue(r, "age"))] --- end --- > vcat(students, update(students, increaseAge)) --- | name | age | favorite color | --- | ------- | --- | -------------- | --- | "Bob" | 12 | "blue" | --- | "Alice" | 17 | "green" | --- | "Eve" | 13 | "red" | --- | "Bob" | 13 | "blue" | --- | "Alice" | 18 | "green" | --- | "Eve" | 14 | "red" | --- > curveMidtermAndFinal = --- function(r): --- curve = --- function(n): --- n + 5 --- end --- [row: --- ("midterm", curve(getValue("midterm"))), --- ("final", curve(getValue("final")))] --- end --- > vcat(gradebook, update(gradebook, curveMidtermAndFinal)) --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | --- | "Bob" | 12 | 8 | 9 | 82 | 7 | 9 | 92 | --- | "Alice" | 17 | 6 | 8 | 93 | 8 | 7 | 90 | --- | "Eve" | 13 | 7 | 9 | 89 | 8 | 8 | 82 | +increasedAge = for i. + {age, ...other} = students.i + {age=age + 1, ...other} +:p vcat students increasedAge + + +curve = \grade. grade + 5 +curvedMidtermAndFinal = for i. + {midterm, final, ...other} = gradebook.i + {midterm=curve(midterm), final=curve(final), ...other} +:p vcat gradebook curvedMidtermAndFinal + + +-- moved up from below +-- ### `dropColumns :: t1:Table * cs:Seq -> t2:Table` +def dropColumns {n f'} (f: Fields) (t:n=>{...f & ...f'}) : n=>{&...f'} = + for i. + {@...f=_, ...r} = t.i + r + +:p dropColumns {age: _ ? } students +:p dropColumns {final: _ ? midterm: _} gradebook + + def hcat {n f f'} (t:n=>{&...f}) (t':n=>{&...f'}) : n=>{...f & ...f'} = for i. {...(t.i), ...(t'.i)} --- > hcat(students, dropColumns(gradebook, ["name", "age"])) --- | name | age | favorite color | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ------- | --- | -------------- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Bob" | 12 | "blue" | 8 | 9 | 77 | 7 | 9 | 87 | --- | "Alice" | 17 | "green" | 6 | 8 | 88 | 8 | 7 | 85 | --- | "Eve" | 13 | "red" | 7 | 9 | 84 | 8 | 8 | 77 | --- > hcat(dropColumns(students, ["name", "age"]), gradebook) --- | favorite color | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | -------------- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "blue" | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | --- | "green" | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | --- | "red" | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | +:p hcat students $ dropColumns {name: _ ? age: _} gradebook +:p hcat (dropColumns {name: _ ? age: _} students) gradebook + def values {n a} : n=>a -> n=>a = id --- > values([ --- [row: ("name", "Alice")], --- [row: ("name", "Bob")]]) --- | name | --- | ------- | --- | "Alice" | --- | "Bob" | --- > values([ --- [row: ("name", "Alice"), ("age", 12)], --- [row: ("name", "Bob"), ("age", 13)]]) --- | name | age | --- | ------- | --- | --- | "Alice" | 12 | --- | "Bob" | 13 | + +:p values [{name="Alice"}, {name="Bob"}] +:p values [{name="Alice", age=12}, + {name="Bob", age=13}] + -- ### `crossJoin :: t1:Table * t2:Table -> t3:Table` def crossJoin {n m f f'} (t:n=>{&...f}) (t':m=>{&...f'}) : (n & m)=>{...f & ...f'} = @@ -264,17 +204,17 @@ def crossJoin {n m f f'} (t:n=>{&...f}) (t':m=>{&...f'}) : (n & m)=>{...f & ...f -- > petiteJelly -- | get acne | red | black | -- | -------- | ----- | ----- | --- | true | false | false | --- | true | false | true | +-- | =True | =False | =False | +-- | =True | =False | =True | -- > crossJoin(students, petiteJelly) -- | name | age | favorite color | get acne | red | black | -- | ------- | --- | -------------- | -------- | ----- | ----- | --- | "Bob" | 12 | "blue" | true | false | false | --- | "Bob" | 12 | "blue" | true | false | true | --- | "Alice" | 17 | "green" | true | false | false | --- | "Alice" | 17 | "green" | true | false | true | --- | "Eve" | 13 | "red" | true | false | false | --- | "Eve" | 13 | "red" | true | false | true | +-- | "Bob" | 12 | "blue" | =True | =False | =False | +-- | "Bob" | 12 | "blue" | =True | =False | =True | +-- | "Alice" | 17 | "green" | =True | =False | =False | +-- | "Alice" | 17 | "green" | =True | =False | =True | +-- | "Eve" | 13 | "red" | =True | =False | =False | +-- | "Eve" | 13 | "red" | =True | =False | =True | -- > crossJoin(emptyTable, petiteJelly) -- | get acne | red | black | -- | -------- | ----- | ----- | @@ -356,34 +296,20 @@ def getColumn {f n a} (c:Label) (t:n=>{@c:a & ...f}) : n=>a = todo '## Subtable def selectRows {n m a} (t:n=>a) (is:m=>n) : m=>a = for i:m. t.(is.i) --- > selectRows(students, [2, 0, 2, 1]) --- | name | age | favorite color | --- | ------- | --- | -------------- | --- | "Eve" | 13 | "red" | --- | "Bob" | 12 | "blue" | --- | "Eve" | 13 | "red" | --- | "Alice" | 17 | "green" | --- > selectRows(gradebooks, [2, 1]) --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | --- ``` -def selectRowsMask (t:n=>a) (shouldTake:n=>Bool) : List a = +:p selectRows students [2@_, 0@_, 2@_, 1@_] +:p selectRows gradebook [2@_, 1@_] + + +def selectRowsMask {n a} (t:n=>a) (shouldTake:n=>Bool) : List a = concat $ for i. case shouldTake.i of True -> AsList _ [t.i] False -> mempty --- > selectRows(students, [true, false, true]) --- | name | age | favorite color | --- | ----- | --- | -------------- | --- | "Bob" | 12 | "blue" | --- | "Eve" | 13 | "red" | --- > selectRows(gradebook, [false, false, true]) --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | --- | ----- | --- | ----- | ----- | ------- | ----- | ----- | ----- | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | + +:p selectRowsMask students [True, False, True] +:p selectRowsMask gradebook [False, False, True] + -- INEXPRESSIBLE: Columns are unordered -- ### (overload 1/3) `selectColumns :: t1:Table * bs:Seq -> t2:Table` @@ -396,29 +322,26 @@ def selectColumns {n f'} (f: Fields) (t: n=>{...f & ...f'}) : n=>{&...f} = for i. {@...f=v, ...} = t.i v --- > selectColumns(students, ["favorite color", "age"]) --- | favorite color | age | --- | -------------- | --- | --- | "blue" | 12 | --- | "green" | 17 | --- | "red" | 13 | --- > selectColumns(gradebook, ["final", "name", "midterm"]) --- | final | name | midterm | --- | ----- | ------- | ------- | --- | 87 | "Bob" | 77 | --- | 85 | "Alice" | 88 | --- | 77 | "Eve" | 84 | - -def head = todo --- > head(students, 1) --- | name | age | favorite color | --- | ------- | --- | -------------- | --- | "Bob" | 12 | "blue" | --- > head(students, -2) + +:p selectColumns {favColor:_ ? age:_} students +:p selectColumns {final:_ ? name:_ ? midterm:_} gradebook + + +def head' {n a} (xs:n=>a) (num:Int) : Maybe ((Fin num)=>a) = + s = size n + case s < num of + True -> Nothing + False -> Just for i:(Fin num). xs.(unsafeFromOrdinal n (ordinal i)) + +:p head' students 1 + +-- TODO: allow negative arguments to `head'` +-- :p head' students -2 -- | name | age | favorite color | -- | ------- | --- | -------------- | -- | "Bob" | 12 | "blue" | + '### TODO: `distinct` (type-classes for records) def distinct = todo @@ -439,41 +362,19 @@ def dropColumn {n f a} (c:Label) (t:n=>{@c:a & ...f}) : n=>{&...f} = {@c=_, ...r} = t.i r --- > dropColumn(students, "age") --- | name | favorite color | --- | ------- | -------------- | --- | "Bob" | "blue" | --- | "Alice" | "green" | --- | "Eve" | "red" | --- > dropColumn(gradebook, "final") --- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | --- | ------- | --- | ----- | ----- | ------- | ----- | ----- | --- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | --- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | --- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | +:p dropColumn ##age students +:p dropColumn ##final gradebook -def dropColumns {n f'} (f: Fields) (t:n=>{...f & ...f'}) : n=>{&...f'} = - for i. - {@...f=_, ...r} = t.i - r --- ### `dropColumns :: t1:Table * cs:Seq -> t2:Table` --- --- > dropColumns(students, ["age"]) --- | name | favorite color | --- | ------- | -------------- | --- | "Bob" | "blue" | --- | "Alice" | "green" | --- | "Eve" | "red" | --- > dropColumns(gradebook, ["final", "midterm"]) --- | name | age | quiz1 | quiz2 | quiz3 | quiz4 | --- | ------- | --- | ----- | ----- | ----- | ----- | --- | "Bob" | 12 | 8 | 9 | 7 | 9 | --- | "Alice" | 17 | 6 | 8 | 8 | 7 | --- | "Eve" | 13 | 7 | 9 | 8 | 8 | - -def tfilter (t:n=>a) (keep:a -> Bool) : List a = - selectRowsMask t $ for i. keep t.i +-- dropColumns used to go here, but it was needed above for the `hcat` demo. + + +def tfilter {n a} (t:n=>a) (keep:a -> Bool) : List a = + selectRowsMask t $ for i. keep t.i + +:p tfilter students \{age, ..._}. + age < 15 + -- > ageUnderFifteen = -- function(r): -- getValue(r, "age") < 15 @@ -492,9 +393,8 @@ def tfilter (t:n=>a) (keep:a -> Bool) : List a = -- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | -- | "Alice" | 17 | 6 | 8 | 88 | 8 | 7 | 85 | -'## Ordering -import sort +'## Ordering -- TODO: We could the sort on the full data, with an Ord instance that only looks at the column -- ### `tsort :: t1:Table * c:ColName * b:Boolean -> t2:Table` @@ -563,6 +463,7 @@ def orderBy = todo -- | "Eve" | 13 | 7 | 9 | 84 | 8 | 8 | 77 | -- | "Bob" | 12 | 8 | 9 | 77 | 7 | 9 | 87 | + '## Aggregate '### TODO: `count` (groupBy) @@ -601,6 +502,7 @@ def bin = todo -- | "80 <= age < 85" | 0 | -- | "85 <= age < 90" | 2 | + '### TODO: `pivotTable` (groupBy) -- ### `pivotTable :: t1:Table * cs:Seq * aggs:Seq -> t2:Table` @@ -633,6 +535,7 @@ def pivotTable (c:Label) (t:n=>{@c:a & ...f}) (agg: List {&...f} -> {&...f'}) : -- | true | false | 0 | 1/4 | -- | true | true | 0 | 0 | + '### TODO: `groupBy` (groupBy) -- TODO: Write this out to flatten out key and row' as records in the output @@ -685,6 +588,7 @@ def groupBy (t:n=>row) -- | "kid" | 87 | -- | "teenager" | 81 | + '## Missing values '### TODO: `completeCases` (type-classes over records) @@ -730,6 +634,7 @@ def fillna {n f a} (c:Label) (t:n=>{@c:(Maybe a) & ...f}) (v:a) : n=>{@c:a & ... -- | "Alice" | 17 | 6 | 8 | 88 | | 7 | 85 | -- | "Eve" | 13 | 0 | 9 | 84 | 8 | 8 | 77 | + '## Data Cleaning '### TODO: `pivotLonger` (??) @@ -1022,8 +927,10 @@ def groupBySubtractive {n f a} (c:Label) (t:n=>{@c:a & ...f}) : List {key: a & g -- | "Alice" | 17 | 6 | 8 | true | 8 | 7 | true | -- | "Eve" | 13 | 7 | 9 | false | 8 | 8 | false | -def select (t:n=>a) (f:a -> n -> b) : n=>b = for i. f t.i i +def select' {n a b} (t:n=>a) (f:a -> n -> b) : n=>b = for i. f t.i i +:p select' students \{age, favColor, ..._} n. + {ID=n, COLOR=favColor, age} -- > select( -- students, -- function(r, n): @@ -1037,6 +944,9 @@ def select (t:n=>a) (f:a -> n -> b) : n=>b = for i. f t.i i -- | 0 | "blue" | 12 | -- | 1 | "green" | 17 | -- | 2 | "red" | 13 | + +:p select' gradebook \{name, midterm, final, ..._} n. + {full_name= concat [name, " Smith"], midterm_and_final_avg=(IToF (midterm + final)) / 2.0} -- > select( -- gradebook, -- function(r, n): From ac79542b54e4eba6519c55865dcd4843735e2f75 Mon Sep 17 00:00:00 2001 From: David Duvenaud Date: Sat, 5 Mar 2022 22:13:01 -0500 Subject: [PATCH 2/4] Added leftJoin and count to dataframes example. --- examples/data-frames.dx | 179 +++++++++++++++++++++++++++++++++++----- 1 file changed, 160 insertions(+), 19 deletions(-) diff --git a/examples/data-frames.dx b/examples/data-frames.dx index e495a88d3..d3d2e17d7 100644 --- a/examples/data-frames.dx +++ b/examples/data-frames.dx @@ -1,4 +1,5 @@ import set +import sort '# Example tables @@ -200,12 +201,22 @@ def values {n a} : n=>a -> n=>a = id def crossJoin {n m f f'} (t:n=>{&...f}) (t':m=>{&...f'}) : (n & m)=>{...f & ...f'} = for (i, j). {...(t.i), ...(t'.j)} +-- Subtable is used in this example, but its definition appears to be missing. +def subTable {n m a f'} (rows:m=>n) (f: Fields) (t: n=>{...f & ...f'}) : m=>{&...f} = + for i:m. + {@...f=v, ...} = t.(rows.i) + v + -- > petiteJelly = subTable(jellyAnon, [0, 1], [0, 1, 2]) -- > petiteJelly -- | get acne | red | black | -- | -------- | ----- | ----- | -- | =True | =False | =False | -- | =True | =False | =True | + + +-- :p crossJoin students petiteJelly + -- > crossJoin(students, petiteJelly) -- | name | age | favorite color | get acne | red | black | -- | ------- | --- | -------------- | -------- | ----- | ----- | @@ -224,15 +235,62 @@ def crossJoin {n m f f'} (t:n=>{&...f}) (t':m=>{&...f'}) : (n & m)=>{...f & ...f -- It should be sufficient to do the join on a single column. One can always -- restructure the table so that the join happens on a tuple of values from -- the flattened columns. -def leftJoin = todo --- def leftJoin (c:Label) (l:n=>{@c: a & ...f}) (r:m=>{@c: a & ...f'}) --- : n=>{extra: Maybe {...f'} & ...f} = todo + +def findInUnordered {a n} [Eq a] (xs:n=>a) (v:a) : Maybe n = + -- Wasteful, but at least the parallelism is exposed. + (AsList num_found found_table) = argFilter (\x. x == v) xs + case num_found == 0 of + True -> Nothing + False -> Just found_table.(unsafeFromOrdinal _ 0) + +-- The implementation below takes (n * m) time, but could be done in O(n log m) +-- if table r was indexed by a set of values of type a + +def leftJoin {n m a f f'} [Eq a] + (c:Label) + (left: n=>{@c: a & ...f}) + (right:m=>{@c: a & ...f'}) + : n=>{@c: a & extra: Maybe { &...f'} & ...f} = + right_c_vals = get_column c right + for i:n. + {@c=left_c_val, ...rest_left} = left.i + newdata = case findInUnordered right_c_vals left_c_val of + Nothing -> Nothing + Just j -> + {@c=_, ...rest_right} = right.j + Just rest_right + {@c=left_c_val, extra=newdata, ...rest_left} + +:p leftJoin ##name students gradebook + +-- TODO: Match the example below exactly once multiple fields are supported. +--:p leftJoin students gradebook {name:_ ? age:_} + + -- > leftJoin(students, gradebook, ["name", "age"]) -- | name | age | favorite color | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | -- | ------- | --- | -------------- | ----- | ----- | ------- | ----- | ----- | ----- | -- | "Bob" | 12 | "blue" | 8 | 9 | 77 | 7 | 9 | 87 | -- | "Alice" | 17 | "green" | 6 | 8 | 88 | 8 | 7 | 85 | -- | "Eve" | 13 | "red" | 7 | 9 | 84 | 8 | 8 | 77 | + +-- The original example is ill-formed, because employees has deptID as Maybe Int, +-- while departments has deptID as Int. So we need to fix the tables up a bit. +departments_maybe_deptID = for i. + {deptID=d, ...rest} = departments.i + {deptID=Just d, ...rest} + +-- Todo: move to prelude +instance {a} [Eq a] Eq (Maybe a) + (==) = \maybe_x maybe_y. case maybe_x of + Nothing -> case maybe_y of + Nothing -> True + Just y -> False + Just x -> case maybe_y of + Just y -> x == y + Nothing -> False + +:p leftJoin ##deptID employees departments_maybe_deptID -- > leftJoin(employees, departments, ["Department ID"]) -- | Last Name | Department ID | Department Name | -- | ------------ | ------------- | --------------- | @@ -328,6 +386,7 @@ def selectColumns {n f'} (f: Fields) (t: n=>{...f & ...f'}) : n=>{&...f} = def head' {n a} (xs:n=>a) (num:Int) : Maybe ((Fin num)=>a) = + -- TODO: allow negative arguments to `head'` s = size n case s < num of True -> Nothing @@ -344,7 +403,15 @@ def head' {n a} (xs:n=>a) (num:Int) : Maybe ((Fin num)=>a) = '### TODO: `distinct` (type-classes for records) -def distinct = todo +-- TODO: Will work once we can make typeclass interfaces for arbitrary records. +-- Here we need to construct an Ord instance to do this efficiently, and Eq to +-- to it at all. +def distinct {n a} [Ord a] (t:n=>a) : List a = + (UnsafeAsSet _ uniquetable) = toSet t + AsList _ uniquetable + +:p distinct students + -- > distinct(students) -- | name | age | favorite color | -- | ------- | --- | -------------- | @@ -384,6 +451,10 @@ def tfilter {n a} (t:n=>a) (keep:a -> Bool) : List a = -- | ----- | --- | -------------- | -- | "Bob" | 12 | "blue" | -- | "Eve" | 13 | "red" | + +:p tfilter students \{name, ..._}. + (listLength name) > 3 + -- > nameLongerThan3Letters = -- function(r): -- length(getValue(r, "name")) > 3 @@ -466,17 +537,59 @@ def orderBy = todo '## Aggregate -'### TODO: `count` (groupBy) - -def count = todo --- Takes a `Table` and a `ColName` representing the name of a column in that `Table`. Produces a `Table` that summarizes how many rows have each value in the given column. --- +'### `count` (groupBy) +Takes a `Table` and a `ColName` representing the name of a column in that `Table`. +Produces a `Table` that summarizes how many rows have each value in the given column. + +'#### Some setup to get efficient counting of duplicates. +This is mostly copied + modified from `set.dx`, could potentially be unified. + +def unsafeAllExceptFirst {n a} (xs:n=>a) : List a = + shortSize = Fin (max 0 ((size n) - 1)) + allButFirst = for i:shortSize. xs.(unsafeFromOrdinal _ ((ordinal i) + 1)) + (AsList _ allButFirst) + +def mergeUniqueSortedListsWithCounts {a} [Eq a] (xlist:List (a & Int)) + (ylist:List (a & Int)) : List (a & Int) = + -- This function is associative, for use in a monoidal reduction. + -- Assumes all xs are <= all ys. + -- The element at the end of xs might equal the + -- element at the beginning of ys. If so, this + -- function removes the duplicate when concatenating the lists. + (AsList nx xs) = xlist + (AsList _ ys) = ylist + case last xs of + Nothing -> ylist + Just (last_x, x_count) -> case first ys of + Nothing -> xlist + Just (first_y, y_count) -> case last_x == first_y of + False -> concat [xlist, ylist] + True -> + concat [allExceptLast xs, + (AsList 1 [(last_x, x_count + y_count)]), + unsafeAllExceptFirst ys] + +def removeDuplicatesFromSortedWithCounts {n a} [Eq a] (xs:n=>a) : List (a & Int) = + xlists = for i:n. (AsList 1 ([(xs.i, 1)])) + reduce (AsList 0 []) mergeUniqueSortedListsWithCounts xlists + +def count {n a f} [Ord a] (c:Label) (t:n=>{@c:a & ...f}) : List {value:a & count:Int} = + c_vals = get_column c t + sorted_c_vals = sort c_vals + (AsList _ distinct_vals) = removeDuplicatesFromSortedWithCounts sorted_c_vals + AsList _ for i. + (value, count) = distinct_vals.i + {value, count} + +:p count ##favColor students -- > count(students, "favorite color") -- | value | count | -- | ------- | ----- | -- | "blue" | 1 | -- | "green" | 1 | -- | "red" | 1 | + +:p count ##age gradebook -- > count(gradebook, "age") -- | value | count | -- | ----- | ----- | @@ -484,17 +597,35 @@ def count = todo -- | 17 | 1 | -- | 13 | 1 | -'### TODO: `bin` (groupBy) + +'### `bin` (groupBy) -- ### `bin :: t1:Table * c:ColName * n:Number -> t2:Table` -def bin = todo --- Groups the values of a numeric column into bins. The parameter `n` specifies the bin width. This function is useful in creating histograms and converting continuous random variables to categorical ones. --- +-- Groups the values of a numeric column into bins. +-- The parameter `n` specifies the bin width. +-- This function is useful in creating histograms and converting continuous +-- random variables to categorical ones. + +def bin {m f} (c:Label) (t:m=>{@c:Int & ...f}) (n:Int) + : List {lower:Int & upper:Int & count:Int} = + -- Can only handle positive integers. + c_vals = get_column c t + binned = for i. rem c_vals.i n + (AsList _ distinct_vals) = removeDuplicatesFromSortedWithCounts binned + AsList _ for i. + (value, count) = distinct_vals.i + {lower=value * n, upper=(value + 1) * n - 1, count} + +:p bin ##age students 5 + -- > bin(students, "age", 5) -- | group | count | -- | ---------------- | ----- | -- | "10 <= age < 15" | 2 | -- | "15 <= age < 20" | 1 | + +:p bin ##final gradebook 5 + -- > bin(gradebook, "final", 5) -- | group | count | -- | ---------------- | ----- | @@ -504,11 +635,17 @@ def bin = todo '### TODO: `pivotTable` (groupBy) +Partitions rows into groups and summarize each group with the functions in `agg`. +Each element of `agg` specifies the output column, the input column, +and the function that compute the summarizing value (e.g. average, sum, and count). +-- Inexpressible? Seems lke we can't match description exactly without allowing +-- some sort of type-level "unzip" for the output fields. -- ### `pivotTable :: t1:Table * cs:Seq * aggs:Seq -> t2:Table` -def pivotTable (c:Label) (t:n=>{@c:a & ...f}) (agg: List {&...f} -> {&...f'}) : n=>{@c:a & ...f'} = todo --- Partitions rows into groups and summarize each group with the functions in `agg`. Each element of `agg` specifies the output column, the input column, and the function that compute the summarizing value (e.g. average, sum, and count). --- +def pivotTable {n a f f'} (c:Label) (t:n=>{@c:a & ...f}) + (agg: List ({&...f} -> {&...f'})) : n=>{@c:a & ...f'} = + todo + -- ```lua -- > pivotTable(students, ["favorite color"], [("age-average", "age", average)]) -- | favorite color | age-average | @@ -537,15 +674,19 @@ def pivotTable (c:Label) (t:n=>{@c:a & ...f}) (agg: List {&...f} -> {&...f'}) : '### TODO: `groupBy` (groupBy) +Groups the rows of a table according to a specified key selector function and +creates a result value from each group and its key. +The rows of each group are projected by using a specified function. -- TODO: Write this out to flatten out key and row' as records in the output -def groupBy (t:n=>row) +def groupBy {n row row' key value} + (t:n=>row) (getKey:row -> key) (project:row -> value) (aggregate:key -> List value -> row') - : List (key & row') = todo + : List (key & row') = + todo --- Groups the rows of a table according to a specified key selector function and creates a result value from each group and its key. The rows of each group are projected by using a specified function. -- -- > colorTemp = -- function(r): From e5b564f1d3c2a1a92fb1437287204d2ab5d0f4f9 Mon Sep 17 00:00:00 2001 From: David Duvenaud Date: Sun, 6 Mar 2022 12:33:38 -0500 Subject: [PATCH 3/4] Added argsort with tests. --- lib/sort.dx | 23 +++++++++++++++++++++++ tests/sort-tests.dx | 13 +++++++++++++ 2 files changed, 36 insertions(+) diff --git a/lib/sort.dx b/lib/sort.dx index 5c50394ac..0ce813bb8 100644 --- a/lib/sort.dx +++ b/lib/sort.dx @@ -59,9 +59,32 @@ def sort {a n} [Ord a] (xs: n=>a) : n=>a = (AsList _ r) = reduce mempty mcombine xlists unsafeCastTable n r + def (+|) {n} [Ix n] (i:n) (delta:Int) : n = i' = ordinal i + delta fromOrdinal _ $ select (i' >= size n) (size n - 1) i' def isSorted {a n} [Ord a] (xs:n=>a) : Bool = all for i. xs.i <= xs.(i +| 1) + + +'#### Argsort + +-- Wrapper datatype so we can sort on only first part. +data Ignore_snd a b [Ord a] = + MkIgnore_snd orderme:a aux:b + +instance {a b} [Eq a, Eq b] Eq (Ignore_snd a b) + (==) = \(MkIgnore_snd x1 y1) (MkIgnore_snd x2 y2). + (x1 == x2) && (y1 == y2) + +instance {a b} [Eq a, Eq b] Ord (Ignore_snd a b) + (<) = \(MkIgnore_snd x1 _) (MkIgnore_snd x2 _). (x1 < x2) + (>) = \(MkIgnore_snd x1 _) (MkIgnore_snd x2 _). (x1 > x2) + +def argsort {a n} [Eq a, Eq n, Ord a] (xs: n=>a) : n=>n = + ix_pairs = for i. MkIgnore_snd xs.i i + sorted = sort ix_pairs + for i. + (MkIgnore_snd _ ix) = sorted.i + ix diff --git a/tests/sort-tests.dx b/tests/sort-tests.dx index 69e49bda0..b4dce0549 100644 --- a/tests/sort-tests.dx +++ b/tests/sort-tests.dx @@ -49,3 +49,16 @@ import sort :p isSorted $ sort ["Charlie", "Alice", "Bob", "Aaron"] > True + + +'### Argsort Tests + +example = [5, 4, 3, 2, 1, 100, 1000, 10] + +-- Argsort gives a perumatation that sorts the array. +argsorted = argsort example +:p isSorted for i. example.(argsorted.i) + +-- Argsort of argsort gives a permutation that un-sorts the array. +rank = argsort $ argsort example +:p all for i. example.i == (sort example).(rank.i) From 46ed7217f6caf792b3650d417a8553bb20f1cfea Mon Sep 17 00:00:00 2001 From: David Duvenaud Date: Sun, 6 Mar 2022 21:54:46 -0500 Subject: [PATCH 4/4] Added groupBy and fixed argsort tests. --- examples/data-frames.dx | 96 +++++++++++++++++++++++++++++++++++------ tests/sort-tests.dx | 2 + 2 files changed, 85 insertions(+), 13 deletions(-) diff --git a/examples/data-frames.dx b/examples/data-frames.dx index d3d2e17d7..2e578611c 100644 --- a/examples/data-frames.dx +++ b/examples/data-frames.dx @@ -93,12 +93,12 @@ def gradebookTable : (Fin 3)=>{name:String & age:Int & quizzes:(Fin 4=>{quiznum: '# Experiment: Indexing by sets -def get_column {n f a} (c:Label) (t:n=>{@c:a & ...f}) : n=>a = +def getColumn {n f a} (c:Label) (t:n=>{@c:a & ...f}) : n=>a = for i. {@c=val, ...r} = t.i val -student_names = get_column ##name students +student_names = getColumn ##name students studentIx = StringSetIx $ toSet student_names students_ix_by_names = for i:studentIx. @@ -108,7 +108,7 @@ students_ix_by_names = for i:studentIx. -- Note: It'd be nice to wrap this logic in a helper function like this: -- def reindex_table_by_column {n f a} (c:Label) (t:n=>{@c:String & ...f}) : -- (StringSetIx set)=>{&...f} = --- ix_vals = get_column c t +-- ix_vals = getColumn c t -- newIx : Type = StringSetIx $ toSet ix_vals -- for i:newIx. -- {@c=_, ...r} = t.((ordinal i)@_) @@ -251,7 +251,7 @@ def leftJoin {n m a f f'} [Eq a] (left: n=>{@c: a & ...f}) (right:m=>{@c: a & ...f'}) : n=>{@c: a & extra: Maybe { &...f'} & ...f} = - right_c_vals = get_column c right + right_c_vals = getColumn c right for i:n. {@c=left_c_val, ...rest_left} = left.i newdata = case findInUnordered right_c_vals left_c_val of @@ -345,9 +345,15 @@ def getValue {f a} (c:Label) (r:{@c:a & ...f}) : a = -- ### (overloading 1/2) `getColumn :: t:Table * n:Number -> vs:Seq` -- ### (overloading 2/2) `getColumn :: t:Table * c:ColName -> vs:Seq` -def getColumn {f n a} (c:Label) (t:n=>{@c:a & ...f}) : n=>a = todo + +-- Define above. +-- def getColumn {f n a} (c:Label) (t:n=>{@c:a & ...f}) : n=>a = todo + +:p getColumn ##age students -- > getColumn(students, "age") -- [12, 17, 13] + +:p getColumn ##name gradebook -- > getColumn(gradebook, "name") -- ["Bob", "Alice", "Eve"] @@ -467,18 +473,21 @@ def tfilter {n a} (t:n=>a) (keep:a -> Bool) : List a = '## Ordering --- TODO: We could the sort on the full data, with an Ord instance that only looks at the column -- ### `tsort :: t1:Table * c:ColName * b:Boolean -> t2:Table` -def tsort {n f a} [Ord a] (c:Label) (t:n=>{@c: a & ...f}) : n=>{@c: a & ...f} = +-- TODO Boolean argument +def tsort {n f a} [Eq n, Ord a] (c:Label) (t:n=>{@c: a & ...f}) : n=>{@c: a & ...f} = ixs = argsort $ getColumn c t for i. t.(ixs.i) +:p tsort ##age students -- > tsort(students, "age", true) -- | name | age | favorite color | -- | ------- | --- | -------------- | -- | "Bob" | 12 | "blue" | -- | "Eve" | 13 | "red" | -- | "Alice" | 17 | "green" | + +:p tsort ##final gradebook -- > tsort(gradebook, "final", false) -- | name | age | quiz1 | quiz2 | midterm | quiz3 | quiz4 | final | -- | ------- | --- | ----- | ----- | ------- | ----- | ----- | ----- | @@ -489,6 +498,8 @@ def tsort {n f a} [Ord a] (c:Label) (t:n=>{@c: a & ...f}) : n=>{@c: a & ...f} = '### TODO: `sortByColumns` (lists of labels) -- ### `sortByColumns :: t1:Table * cs:Seq -> t2:Table` +def sortByColumns {n f f' a} [Eq n, Ord a] (f: Fields) (t:n=>{...f & ...f'}) : n=>{...f & ...f'} = + todo -- > sortByColumns(students, ["age"]) -- | name | age | favorite color | -- | ------- | --- | -------------- | @@ -574,7 +585,7 @@ def removeDuplicatesFromSortedWithCounts {n a} [Eq a] (xs:n=>a) : List (a & Int) reduce (AsList 0 []) mergeUniqueSortedListsWithCounts xlists def count {n a f} [Ord a] (c:Label) (t:n=>{@c:a & ...f}) : List {value:a & count:Int} = - c_vals = get_column c t + c_vals = getColumn c t sorted_c_vals = sort c_vals (AsList _ distinct_vals) = removeDuplicatesFromSortedWithCounts sorted_c_vals AsList _ for i. @@ -609,7 +620,7 @@ def count {n a f} [Ord a] (c:Label) (t:n=>{@c:a & ...f}) : List {value:a & count def bin {m f} (c:Label) (t:m=>{@c:Int & ...f}) (n:Int) : List {lower:Int & upper:Int & count:Int} = -- Can only handle positive integers. - c_vals = get_column c t + c_vals = getColumn c t binned = for i. rem c_vals.i n (AsList _ distinct_vals) = removeDuplicatesFromSortedWithCounts binned AsList _ for i. @@ -673,21 +684,70 @@ def pivotTable {n a f f'} (c:Label) (t:n=>{@c:a & ...f}) -- | true | true | 0 | 0 | -'### TODO: `groupBy` (groupBy) +'### `groupBy` (groupBy) Groups the rows of a table according to a specified key selector function and creates a result value from each group and its key. The rows of each group are projected by using a specified function. +def mergeUniqueSortedListsWithAux {a b} [Eq a] + (xlist:List (a & List b)) + (ylist:List (a & List b)) : List (a & List b) = + -- This function is associative, for use in a monoidal reduction. + -- Assumes all xs are <= all ys. + -- The element at the end of xs might equal the + -- element at the beginning of ys. If so, this + -- function removes the duplicate when concatenating the lists, + -- and appends the two lists of bs. + (AsList nx xs) = xlist + (AsList _ ys) = ylist + case last xs of + Nothing -> ylist + Just (last_x, x_b_list) -> case first ys of + Nothing -> xlist + Just (first_y, y_b_list) -> case last_x == first_y of + False -> concat [xlist, ylist] + True -> + concat [allExceptLast xs, + (AsList 1 [(last_x, concat [x_b_list, y_b_list])]), + unsafeAllExceptFirst ys] + +def removeDuplicatesFromSortedWithAux {n a b} [Eq a] (xs:n=>(a & b)) : List (a & List b) = + xlists = for i:n. + (xa, xb) = xs.i + (AsList 1 [(xa, AsList 1 [xb])]) + reduce (AsList 0 []) mergeUniqueSortedListsWithAux xlists + + + -- TODO: Write this out to flatten out key and row' as records in the output -def groupBy {n row row' key value} +def groupBy {n row row' key value} [Eq key, Eq n, Ord key] (t:n=>row) (getKey:row -> key) (project:row -> value) (aggregate:key -> List value -> row') : List (key & row') = - todo + keys = for i. getKey t.i + sortedixs = argsort keys + sortedkeyvals = for i. + (keys.(sortedixs.i), project t.(sortedixs.i)) + (AsList _ distinct_keys) = removeDuplicatesFromSortedWithAux sortedkeyvals + AsList _ for j. + (curkey, val_list) = distinct_keys.j + (curkey, aggregate curkey val_list) + + +def colorTemp {f} ({favColor, ...}:{favColor:String & ...f}) : String = + case favColor == "red" of + True -> "warm" + False -> "cool" + +def nameLength {f} ({name, ...}:{name:String & ...f}) : Int = listLength name +def aggregate {a} (key:a) ((AsList _ vs):List Int) : ({key:a & average:Float}) = + {key, average=mean (map IToF vs)} + +-- This has two columns for "key", not sure what was meant by original semantics. +:p groupBy students colorTemp nameLength aggregate --- -- > colorTemp = -- function(r): -- if getValue(r, "favorite color") == "red": @@ -709,6 +769,16 @@ def groupBy {n row row' key value} -- | ------ | ------- | -- | "warm" | 3 | -- | "cool" | 4 | + +def abstractAge {f} ({age, ...}:{age:Int & ...f}) : String = + case age <= 12 of + True -> "kid" + False -> case age <= 19 of + True -> "teenager" + False -> "adult" + +:p groupBy gradebook abstractAge (\{final, ...}. final) aggregate + -- > abstractAge = -- function(r): -- if (getValue(r, "age") <= 12): diff --git a/tests/sort-tests.dx b/tests/sort-tests.dx index b4dce0549..4163257f5 100644 --- a/tests/sort-tests.dx +++ b/tests/sort-tests.dx @@ -58,7 +58,9 @@ example = [5, 4, 3, 2, 1, 100, 1000, 10] -- Argsort gives a perumatation that sorts the array. argsorted = argsort example :p isSorted for i. example.(argsorted.i) +> True -- Argsort of argsort gives a permutation that un-sorts the array. rank = argsort $ argsort example :p all for i. example.i == (sort example).(rank.i) +> True