Skip to content

Keras models support #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ language: python
python:
- 3.6
- 3.7
- 3.8-dev

install:
- python setup.py -q install
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

# What packages are required for this module to be executed?
REQUIRED = [
'tensorflow==2.0.0-alpha0'
]
SETUP_REQUIRED = [
'pytest-runner'
Expand Down
1 change: 1 addition & 0 deletions sqlflow_models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from ._version import __version__
from .dnnclassifier import DNNClassifier
42 changes: 42 additions & 0 deletions sqlflow_models/dnnclassifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import tensorflow as tf

class DNNClassifier(tf.keras.Model):
def __init__(self, feature_columns, hidden_units=[10,10], n_classes=2):
"""DNNClassifier
:param feature_columns: feature columns.
:type feature_columns: list[tf.feature_column].
:param hidden_units: number of hidden units.
:type hidden_units: list[int].
:param n_classes: List of hidden units per layer.
:type n_classes: int.
"""
super(DNNClassifier, self).__init__()

# combines all the data as a dense tensor
self.feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
self.hidden_layers = []
for hidden_unit in hidden_units:
self.hidden_layers.append(tf.keras.layers.Dense(hidden_unit))
self.prediction_layer = tf.keras.layers.Dense(n_classes, activation='softmax')

def call(self, inputs):
x = self.feature_layer(inputs)
for hidden_layer in self.hidden_layers:
x = hidden_layer(x)
return self.prediction_layer(x)

def default_optimizer(self):
"""Default optimizer name. Used in model.compile."""
return tf.keras.optimizers.Adagrad(lr=0.1)

def default_loss(self):
"""Default loss function. Used in model.compile."""
return 'sparse_categorical_crossentropy'

def default_training_epochs(self):
"""Default training epochs. Used in model.fit."""
return 2
Copy link
Contributor

@tonyyang-svail tonyyang-svail May 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 might be too small. Change to 50?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested 2 is enough for iris demo case. Change to 50 maybe take too long to train


def prepare_prediction_column(self, prediction):
"""Return the class label of highest probability."""
return prediction.argmax(axis=-1)
31 changes: 31 additions & 0 deletions tests/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest
import tensorflow as tf

def train_input_fn(features, labels, batch_size=32):
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
dataset = dataset.shuffle(1000).repeat().batch(batch_size)
return dataset

def eval_input_fn(features, labels, batch_size=32):
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
dataset = dataset.batch(batch_size)
return dataset

class BaseTestCases:
class BaseTest(object):
def setUp(self):
self.model, self.features, self.label = None, {}, None

def test_train_and_predict(self):
self.setUp()

self.model.compile(optimizer=self.model.default_optimizer(),
loss=self.model.default_loss(),
metrics=["accuracy"])
self.model.fit(train_input_fn(self.features, self.label),
epochs=self.model.default_training_epochs(),
steps_per_epoch=100, verbose=0)
loss, acc = self.model.evaluate(eval_input_fn(self.features, self.label))
print(loss, acc)
assert(loss < 10)
assert(acc > 0.3)
22 changes: 22 additions & 0 deletions tests/test_dnnclassifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import sqlflow_models
from .base import BaseTestCases

import tensorflow as tf
import unittest


class TestDNNClassifier(BaseTestCases.BaseTest):
def setUp(self):
self.features = {"c1": [float(x) for x in range(100)],
"c2": [float(x) for x in range(100)],
"c3": [float(x) for x in range(100)],
"c4": [float(x) for x in range(100)]}
self.label = [0 for _ in range(50)] + [1 for _ in range(50)]
feature_columns = [tf.feature_column.numeric_column(key) for key in
self.features]
self.model = sqlflow_models.DNNClassifier(feature_columns=feature_columns)


if __name__ == '__main__':
unittest.main()