Skip to content

Gmean, Hmean expanded for other datatypes #956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 19 additions & 44 deletions integration_tests/test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from statistics import (mean, fmean, geometric_mean, harmonic_mean,
variance, stdev, covariance, correlation)
variance, stdev)
from ltypes import i32, f64, i64

eps: f64
Expand Down Expand Up @@ -45,13 +45,31 @@ def test_geometric_mean():
k = geometric_mean(c)
assert abs(k - 1.8171205928321397) < eps

d: list[f64]
d = [1.1, 3.4, 17.982, 11.8]
l: f64
l = geometric_mean(d)
assert abs(l - 5.307596520524432) < eps

def test_harmonic_mean():
c: list[i32]
c = [9,2,46]
k: f64
k = harmonic_mean(c)
assert abs(k - 4.740458015267175) < eps

d: list[i32]
d = [9, 0, 46]
l: f64
l = harmonic_mean(d)
assert l == 0.0

e: list[f64]
e = [1.1, 3.4, 17.982, 11.8]
f: f64
f = harmonic_mean(e)
assert abs(f - 2.977152988015106) < eps


def test_variance():
a: list[i32]
Expand All @@ -66,47 +84,6 @@ def test_variance():
k = variance(b)
assert abs(k - 0.40924) < eps

def test_covariance():
a: list[i32]
a = [1, 2, 3, 4, 5, 6, 7, 8, 9]
b: list[i32]
b = [1, 2, 3, 1, 2, 3, 1, 2, 3]
j: f64
j = covariance(a,b)
assert abs(j - 0.75) < eps

c: list[f64]
c = [2.74, 1.23, 2.63, 2.22, 3.0, 1.98]
d: list[f64]
d = [9.4, 1.23, 2.63, 22.4, 1.9, 13.98]
k: f64
k = covariance(c,d)
assert abs(k + 0.24955999999999934) < eps

def test_correlation():
a: list[i32]
a = [11, 2, 7, 4, 15, 6, 10, 8, 9, 1, 11, 5, 13, 6, 15]
b: list[i32]
b = [2, 5, 17, 6, 10, 8, 13, 4, 6, 9, 11, 2, 5, 4, 7]

j: f64
j = correlation(a,b)
assert abs(j - 0.11521487988958108) < eps

c: list[i32]
c = [1, 2, 3, 4, 5, 6, 7, 8, 9]
d: list[i32]
d = [9, 8, 7, 6, 5, 4, 3, 2, 1]

k: f64
k = correlation(c,c)
assert k == 1.0

l: f64
l = correlation(c,d)
assert l == -1.0


def test_stdev():
a: list[i32]
a = [1, 2, 3, 4, 5]
Expand All @@ -128,7 +105,5 @@ def check():
test_fmean()
test_variance()
test_stdev()
test_covariance()
test_correlation()

check()
185 changes: 84 additions & 101 deletions src/runtime/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def fmean(x: list[f32]) -> f64:
"""
return mean(x)


@overload
def geometric_mean(x: list[i32]) -> f64:
"""
Returns the geometric mean of a data sequence of numbers
Expand All @@ -115,11 +115,51 @@ def geometric_mean(x: list[i32]) -> f64:
i: i32

for i in range(k):
if x[i] <= 0:
raise Exception("geometric mean requires a non-empty dataset containing positive numbers")
product *= float(x[i])

return product**(1/k)

@overload
def geometric_mean(x: list[i64]) -> f64:
"""
Returns the geometric mean of a data sequence of numbers
"""
k: i32 = len(x)
if k == 0:
return 0.0
product: f64
product = 1.0
i: i32

for i in range(k):
if x[i] <= 0:
raise Exception("geometric mean requires a non-empty dataset containing positive numbers")
product *= float(x[i])

return product ** (1 / k)

@overload
def geometric_mean(x: list[f64]) -> f64:
"""
Returns the geometric mean of a data sequence of numbers
"""
k: i32 = len(x)
if k == 0:
return 0.0
product: f64
product = 1.0
i: i32

for i in range(k):
if x[i] <= 0.0:
raise Exception("geometric mean requires a non-empty dataset containing positive numbers")
product *= x[i]

return product**(1/k)

@overload
def harmonic_mean(x: list[i32]) -> f64:
"""
Returns the harmonic mean of a data sequence of numbers
Expand All @@ -134,6 +174,49 @@ def harmonic_mean(x: list[i32]) -> f64:
for i in range(k):
if x[i] == 0:
return 0.0
if x[i] < 0.0:
raise Exception("Harmonic mean does not support negative values")
sum += 1 / x[i]

return float(k/sum)

@overload
def harmonic_mean(x: list[i64]) -> f64:
"""
Returns the harmonic mean of a data sequence of numbers
"""
k: i32 = len(x)
if k == 0:
return 0.0
sum: f64
sum = 0.0
i: i32

for i in range(k):
if x[i] == 0:
return 0.0
if x[i] < 0 :
raise Exception("Harmonic mean does not support negative values")
sum += 1 / x[i]
return k/sum

@overload
def harmonic_mean(x: list[f64]) -> f64:
"""
Returns the harmonic mean of a data sequence of numbers
"""
k: i32 = len(x)
if k == 0:
return 0.0
sum: f64
sum = 0.0
i: i32

for i in range(k):
if x[i] == 0.0:
return 0.0
if x[i] < 0.0:
raise Exception("Harmonic mean does not support negative values")
sum += 1 / x[i]

return k / sum
Expand Down Expand Up @@ -189,103 +272,3 @@ def stdev(x: list[i32]) -> f64:
"""
return variance(x)**0.5

@overload
def covariance(x: list[i32], y: list[i32]) -> f64:
"""
Returns the covariance of a data sequence of numbers
"""
n: i32 = len(x)
m: i32 = len(y)
if (n < 2 or m < 2) or n != m:
raise Exception("Both inputs must be of the same length (no less than two)")
xmean: f64 = mean(x)
ymean: f64 = mean(y)
num: f64
num = 0.0
i: i32
for i in range(n):
num += (x[i] - xmean) * (y[i] - ymean)
return num / (n-1)

@overload
def covariance(x: list[f64], y: list[f64]) -> f64:
"""
Returns the covariance of a data sequence of numbers
"""
n: i32 = len(x)
m: i32 = len(y)
if (n < 2 or m < 2) or n != m:
raise Exception("Both inputs must be of the same length (no less than two)")
xmean: f64 = mean(x)
ymean: f64 = mean(y)
num: f64
num = 0.0
i: i32
for i in range(n):
num += (x[i] - xmean) * (y[i] - ymean)
return num / (n-1)

@overload
def correlation(x: list[i32], y: list[i32]) -> f64:
"""
Return the Pearson's correlation coefficient for two inputs.
"""
n: i32 = len(x)
m: i32 = len(y)
if n != m:
raise Exception("correlation requires that both inputs have same number of data points")
if n < 2:
raise Exception("correlation requires at least two data points")
xmean: f64 = mean(x)
ymean: f64 = mean(y)

sxy: f64 = 0.0
i: i32
for i in range(n):
sxy += (x[i] - xmean) * (y[i] - ymean)

sxx: f64 = 0.0
j: i32
for j in range(n):
sxx += (x[j] - xmean) ** 2

syy: f64 = 0.0
k: i32
for k in range(n):
syy += (y[k] - ymean) ** 2
if sqrt(sxx * syy) == 0:
raise Exception('at least one of the inputs is constant')
return sxy / sqrt(sxx * syy)

@overload
def correlation(x: list[f64], y: list[f64]) -> f64:
"""
Return the Pearson's correlation coefficient for two inputs.
"""
n: i32 = len(x)
m: i32 = len(y)
if n != m:
raise Exception("correlation requires that both inputs have same number of data points")
if n < 2:
raise Exception("correlation requires at least two data points")
xmean: f64 = mean(x)
ymean: f64 = mean(y)

sxy: f64 = 0.0
i: i32
for i in range(n):
sxy += (x[i] - xmean) * (y[i] - ymean)

sxx: f64 = 0.0
j: i32
for j in range(n):
sxx += (x[j] - xmean) ** 2

syy: f64 = 0.0
k: i32
for k in range(n):
syy += (y[k] - ymean) ** 2
if sqrt(sxx * syy) == 0:
raise Exception('at least one of the inputs is constant')
return sxy / sqrt(sxx * syy)