%%html
<script src="https://bits.csb.pitt.edu/asker.js/lib/asker.js"></script>
<style>
.reveal pre { font-size: 100%; overflow-x: auto; overflow-y: auto;}
.reveal h1 { font-size: 2em}
.reveal ol, .reveal dl, .reveal ul { display: block}
.jp-OutputArea-output { padding: 0; }
</style>

<script>
$3Dmolpromise = new Promise((resolve, reject) => { 
    require(['https://3dmol.org/build/3Dmol-nojquery.js'], function(){       
            resolve();});
});
require(['https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.2.2/Chart.js'], function(Ch){
 Chart = Ch;
});

$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');


//the callback is provided a canvas object and data 
var chartmaker = function(canvas, labels, data) {
  var ctx = $(canvas).get(0).getContext("2d");
     var dataset = {labels: labels,                     
    datasets:[{
     data: data,
     backgroundColor: "rgba(150,64,150,0.5)",
         fillColor: "rgba(150,64,150,0.8)",    
  }]};
  var myBarChart = new Chart(ctx,{type:'bar',data:dataset,options:{legend: {display:false},
        scales: {
            yAxes: [{
                ticks: {
                    min: 0,
                }
            }]}}});
};

$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


%%html
<div id="pythonfam" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#pythonfam';
	jQuery(divid).asker({
	    id: divid,
	    question: "How familiar are you with Python?",
        answers: ['Novice','Beginner','Intermediate','Advanced'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


!wget http://bits.csb.pitt.edu/files/sol.npz

--2022-11-04 11:01:25--  http://bits.csb.pitt.edu/files/sol.npz
Resolving bits.csb.pitt.edu (bits.csb.pitt.edu)... 136.142.4.139
Connecting to bits.csb.pitt.edu (bits.csb.pitt.edu)|136.142.4.139|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3173890 (3.0M)
Saving to: ‘sol.npz’

sol.npz             100%[===================>]   3.03M  --.-KB/s    in 0.05s   

2022-11-04 11:01:25 (62.9 MB/s) - ‘sol.npz’ saved [3173890/3173890]


import numpy as np
with np.load('sol.npz') as data:
    X = data['X']
    y = data['y']
X.shape

(387, 1024)


X[0]

array([0., 0., 0., ..., 0., 0., 0.])


import matplotlib.pyplot as plt
plt.hist(y);


%%html
<div id="crcclasstype" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crcclasstype';
	jQuery(divid).asker({
	    id: divid,
	    question: "What sort of problem is this??",
        answers: ['Classification','Regression','Unsupervised'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


import sklearn


X.shape,y.shape

((387, 1024), (387,))


%%html
<div id="crcwshape" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crcwshape';
	jQuery(divid).asker({
	    id: divid,
	    question: "What is the shape of w?",
        answers: ['387','1024','(378,1024)','(1024,387)',"I've never taken matrix algebra"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


from sklearn import linear_model
model = linear_model.LinearRegression() # create the model
model.fit(X,y) # fit the model to the data
p = model.predict(X) # make predictions with the model


plt.scatter(y,p); plt.xlabel('True'); plt.ylabel('Predict');


ylabel = y > -1
plabel = p > -1


from sklearn.metrics import * #pull in accuracy score, amount other things
accuracy_score(ylabel, plabel)

0.9431524547803618


print(confusion_matrix(ylabel,plabel))

[[200  12]
 [ 10 165]]


print(np.array([['TN', 'FP'],['FN','TP']]))

[['TN' 'FP']
 ['FN' 'TP']]


print(classification_report(ylabel,plabel))

              precision    recall  f1-score   support

       False       0.95      0.94      0.95       212
        True       0.93      0.94      0.94       175

    accuracy                           0.94       387
   macro avg       0.94      0.94      0.94       387
weighted avg       0.94      0.94      0.94       387


%%html
<div id="crcconfq" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crcconfq';
	jQuery(divid).asker({
	    id: divid,
	    question: "What would the recall be if our classifer predicted everything as true?",
        answers: ['0','81/306','0.5','1.0'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


fpr, tpr, thresholds = roc_curve(ylabel, p)  #not using rounded values
plt.plot(fpr,tpr,linewidth=4,clip_on=False)
plt.xlabel("FPR"); plt.ylabel("TPR")
plt.gca().set_aspect('equal')
plt.ylim(0,1); plt.xlim(0,1); plt.show()


randp = np.random.permutation(p)
fpr, tpr, thresholds = roc_curve(ylabel, randp)  
plt.plot(fpr,tpr,linewidth=3); plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1)
plt.gca().set_aspect('equal')

print(roc_auc_score(ylabel,p))

0.9800808625336928


np.sqrt(mean_squared_error(y,p))

0.2666424672444869


%%html
<div id="crccrossq" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crccrossq';
	jQuery(divid).asker({
	    id: divid,
	    question: "In 5-fold cross validation, on average, how many times will a given example be in the training set?",
        answers: ['0','1','2.5','4','5'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


from sklearn.model_selection import KFold
kf = KFold(n_splits=5)
accuracies = []; rocs = []
for train,test in kf.split(X): # these are arrays of indices
    model = linear_model.LinearRegression() 
    model.fit(X[train],y[train]) #slice out the training folds
    p = model.predict(X[test]) #slice out the test fold
    accuracies.append(accuracy_score(ylabel[test],p > -1))
    fpr, tpr, thresholds = roc_curve(ylabel[test], p)  
    rocs.append( (fpr,tpr, roc_auc_score(ylabel[test], p)))


print(accuracies)
print("Average accuracy:",np.mean(accuracies))

[0.6153846153846154, 0.7692307692307693, 0.6493506493506493, 0.6883116883116883, 0.6753246753246753]
Average accuracy: 0.6795204795204794


np.count_nonzero(ylabel==0)/float(len(ylabel))

0.5478036175710594


for roc in rocs:
    plt.plot(roc[0],roc[1],label="AUC %.2f" %roc[2]); 
plt.gca().set_aspect('equal')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1); plt.legend(loc='best'); plt.show()


from sklearn.model_selection import cross_validate

cross_validate(linear_model.LinearRegression(),X,y > -1,cv=5,scoring=['roc_auc'])

{'fit_time': array([0.06256986, 0.06139326, 0.08827758, 0.10639787, 0.07084489]),
 'score_time': array([0.00213408, 0.00225043, 0.00306988, 0.00306296, 0.00292492]),
 'test_roc_auc': array([0.51513158, 0.40972222, 0.47027027, 0.63222222, 0.62820513])}


%%html
<div id="crchowgood" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crchowgood';
	jQuery(divid).asker({
	    id: divid,
	    question: "How good is the predictiveness of our model?",
        answers: ['A','B','C','D'],
		extra: ['Still Perfect','Not perfect, but still good','Not great, but better than random','Horrible'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


kf = KFold(n_splits=5)
accuracies = []; rocs = []
for train,test in kf.split(X): # these are arrays of indices
    model = linear_model.Lasso(alpha=0.005) 
    model.fit(X[train],y[train]) #slice out the training folds
    p = model.predict(X[test]) #slice out the test fold
    accuracies.append(accuracy_score(ylabel[test],p > 0))
    fpr, tpr, thresholds = roc_curve(ylabel[test], p)  
    rocs.append( (fpr,tpr, roc_auc_score(ylabel[test],p)))


print(accuracies)
print("Average accuracy:",np.mean(accuracies))

[0.6666666666666666, 0.7948717948717948, 0.6493506493506493, 0.8051948051948052, 0.7402597402597403]
Average accuracy: 0.7312687312687312


for roc in rocs:
    plt.plot(roc[0],roc[1],label="AUC %.2f" %roc[2]); 
plt.gca().set_aspect('equal')    
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1); plt.legend(loc='best'); plt.show()


linmodel = linear_model.LinearRegression() 
linmodel.fit(X,y) 
lassomodel = linear_model.Lasso(alpha=0.005) 
lassomodel.fit(X,y);


print("Nonzero coefficients in linear:",np.count_nonzero(linmodel.coef_))
print("Nonzero coefficients in LASSO:",np.count_nonzero(lassomodel.coef_))

Nonzero coefficients in linear: 881
Nonzero coefficients in LASSO: 64


from sklearn import model_selection
#setup grid search with default 5-fold CV and scoring
searcher = model_selection.GridSearchCV(linear_model.Lasso(max_iter=10000), {'alpha': [0.001,0.005,0.01,0.1]})
searcher.fit(X,y)
searcher.best_params_

{'alpha': 0.005}


lassomodel = linear_model.LassoCV(n_jobs=8,max_iter=10000)
lassomodel.fit(X,y)

LassoCV(max_iter=10000, n_jobs=8)

LassoCV(max_iter=10000, n_jobs=8)


lassomodel.alpha_

0.00455203947849711


from sklearn import svm
kf = KFold(n_splits=5)
accuracies = []; rocs = []; badrocs = []
for train,test in kf.split(X): # these are arrays of indices
    model = svm.SVC(probability=True) 
    model.fit(X[train],ylabel[train]) #slice out the training folds
    p = model.predict(X[test]) # prediction (0 or 1)
    probs = model.predict_proba(X[test])[:,1] #probability of being 1
    accuracies.append(accuracy_score(ylabel[test],p))
    fpr, tpr, thresholds = roc_curve(ylabel[test], probs)  
    rocs.append( (fpr,tpr, roc_auc_score(ylabel[test],probs)))
    fpr, tpr, thresholds = roc_curve(ylabel[test], p)  
    badrocs.append( (fpr,tpr, roc_auc_score(ylabel[test],p)))


print(accuracies)
print("Average accuracy:",np.mean(accuracies))

[0.6923076923076923, 0.7307692307692307, 0.6883116883116883, 0.7922077922077922, 0.6753246753246753]
Average accuracy: 0.7157842157842158


for roc in rocs:
    plt.plot(roc[0],roc[1],label="AUC %.2f" %roc[2]); 
plt.gca().set_aspect('equal')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1); plt.legend(loc='best'); plt.show()


for roc in badrocs:
    plt.plot(roc[0],roc[1],label="AUC %.2f" %roc[2]); 
plt.gca().set_aspect('equal')
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1); plt.legend(loc='best'); plt.show()


from sklearn import model_selection
searcher = model_selection.GridSearchCV(svm.SVC(), {'kernel': ['linear','rbf'],'C': [1,10,100,1000]},scoring='roc_auc',n_jobs=-1)
searcher.fit(X,ylabel)

GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid={'C': [1, 10, 100, 1000], 'kernel': ['linear', 'rbf']},
             scoring='roc_auc')

GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid={'C': [1, 10, 100, 1000], 'kernel': ['linear', 'rbf']},
             scoring='roc_auc')

SVC()

SVC()


print("Best AUC:",searcher.best_score_)
print("Parameters",searcher.best_params_)

Best AUC: 0.8149612403100776
Parameters {'C': 10, 'kernel': 'rbf'}


from sklearn import neighbors
kf = KFold(n_splits=5)
accuracies = []; rocs = []
for train,test in kf.split(X): # these are arrays of indices
    model = neighbors.KNeighborsClassifier() # defaults to k=5
    model.fit(X[train],ylabel[train]) #slice out the training folds
    p = model.predict(X[test]) # prediction (0 or 1)
    probs = model.predict_proba(X[test])[:,1] #probability of being 1
    accuracies.append(accuracy_score(ylabel[test],p))
    fpr, tpr, thresholds = roc_curve(ylabel[test], probs)  
    rocs.append( (fpr,tpr, roc_auc_score(ylabel[test],probs)))


print(accuracies)
print("Average accuracy:",np.mean(accuracies))

[0.7307692307692307, 0.6923076923076923, 0.7532467532467533, 0.7272727272727273, 0.7662337662337663]
Average accuracy: 0.733966033966034


for roc in rocs:
    plt.plot(roc[0],roc[1],label="AUC %.2f" %roc[2]); 
plt.gca().set_aspect('equal')    
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1); plt.legend(loc='best'); plt.show()


%%html
<div id="crcknnq" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crcknnq';
	jQuery(divid).asker({
	    id: divid,
	    question: "What could <b>not</b> be a valid probability from the previous k-nn (k=5) model?",
        answers: ['0','.5','.6','1'],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


searcher = model_selection.GridSearchCV(neighbors.KNeighborsClassifier(), \
                {'n_neighbors': [1,2,3,4,5,10]},scoring='roc_auc',n_jobs=-1)
searcher.fit(X,ylabel);


print("Best AUC:",searcher.best_score_)
print("Parameters",searcher.best_params_)

Best AUC: 0.8024758740705586
Parameters {'n_neighbors': 5}


%%html
<div id="dtqml" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#dtqml';
	jQuery(divid).asker({
	    id: divid,
	    question: "Humidity is low, it's windy, and it is sunny. What do you do?",
		answers: ["Play","Don't Play"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();
</script>


from sklearn import tree
kf = KFold(n_splits=5)
accuracies = []; rocs = []
for train,test in kf.split(X): # these are arrays of indices
    model = tree.DecisionTreeClassifier() 
    model.fit(X[train],ylabel[train]) #slice out the training folds
    p = model.predict(X[test]) # prediction (0 or 1)
    probs = model.predict_proba(X[test])[:,1] #probability of being 1
    accuracies.append(accuracy_score(ylabel[test],p))
    fpr, tpr, thresholds = roc_curve(ylabel[test], probs)  
    rocs.append( (fpr,tpr, roc_auc_score(ylabel[test],probs)))


print(accuracies)
print("Average accuracy:",np.mean(accuracies))

[0.7435897435897436, 0.7435897435897436, 0.6883116883116883, 0.7532467532467533, 0.6883116883116883]
Average accuracy: 0.7234099234099234


for roc in rocs:
    plt.plot(roc[0],roc[1],label="AUC %.2f" %roc[2]); 
plt.gca().set_aspect('equal')    
plt.xlabel("FPR"); plt.ylabel("TPR"); plt.ylim(0,1); plt.xlim(0,1); plt.legend(loc='best'); plt.show()


set(probs)

{0.0, 0.5, 1.0}


searcher = model_selection.GridSearchCV(tree.DecisionTreeClassifier(),  \
                {'max_depth': [1,2,3,4,5,10]},scoring='roc_auc',n_jobs=-1)
searcher.fit(X,ylabel);


print("Best AUC:",searcher.best_score_)
print("Parameters",searcher.best_params_)

Best AUC: 0.7510678690080684
Parameters {'max_depth': 10}


model = tree.DecisionTreeClassifier(max_depth=5).fit(X,ylabel)
set(model.predict_proba(X)[:,1])

{0.0,
 0.046153846153846156,
 0.15217391304347827,
 0.2391304347826087,
 0.5,
 0.6666666666666666,
 1.0}


!wget http://bits.csb.pitt.edu/files/sol.npz

--2022-11-04 11:01:58--  http://bits.csb.pitt.edu/files/sol.npz
Resolving bits.csb.pitt.edu (bits.csb.pitt.edu)... 136.142.4.139
Connecting to bits.csb.pitt.edu (bits.csb.pitt.edu)|136.142.4.139|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3173890 (3.0M)
Saving to: ‘sol.npz.1’

sol.npz.1           100%[===================>]   3.03M  --.-KB/s    in 0.05s   

2022-11-04 11:01:58 (60.9 MB/s) - ‘sol.npz.1’ saved [3173890/3173890]


from sklearn.model_selection import KFold
from sklearn.metrics import *
from sklearn import ensemble
kf = KFold(n_splits=5)
errors = []
predictions = []
for train,test in kf.split(X): # these are arrays of indices
    model = sklearn.ensemble.RandomForestRegressor() 
    model.fit(X[train],y[train]) #slice out the training folds
    p = model.predict(X[test]) # prediction (0 or 1)
    predictions += zip(y[test],p)
    errors.append(mean_squared_error(y[test],p))
predictions = np.array(predictions)


print(errors)
print("Average squared error",np.mean(errors))

[0.27703325338160617, 0.32727308855587467, 0.19780737467745002, 0.3881076294593794, 0.19431154514702692]
Average squared error 0.27690657824426745


import seaborn as sns
sns.jointplot(predictions[:,0],predictions[:,1])
plt.xlabel("Experiment"); plt.ylabel("Predicted")
np.corrcoef(predictions[:,0],predictions[:,1])[0][1]

0.9062512408216786


%%html
<div id="crcinand" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crcinand';
	jQuery(divid).asker({
	    id: divid,
	    question: "What are the corresponding outputs for x = [0,0],[0,1],[1,0], and [1,1]?",
		answers: ["0,0,0,0","0,1,1,0","0,0,0,1","0,1,1,1","1,1,1,0"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


plt.plot(x, x > 0,linewidth=1,clip_on=False);
plt.hlines(xmin=-10,xmax=0,y=0,linewidth=3,color='b')
plt.hlines(xmin=0,xmax=10,y=1,linewidth=3,color='b');


plt.plot(x, 1/(1+np.exp(-x)),linewidth=4,clip_on=False);
plt.plot(x, 1/(1+np.exp(-2*x)),linewidth=2,clip_on=False);
plt.plot(x, 1/(1+np.exp(-.5*x)),linewidth=2,clip_on=False);


plt.plot([-10,10],[0,0],'k--')
plt.plot(x, np.tanh(x),linewidth=4,clip_on=False);


plt.plot(x,x*(x > 0),clip_on=False,linewidth=4);


%%html
<div id="crcibpcnt" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crcibpcnt';
	jQuery(divid).asker({
	    id: divid,
	    question: "A network has 10 input nodes, two hidden layers each with 10 neurons, and 10 output neurons.  How many parameters does training have to estimate?",
		answers: ["30","100","300","330","600"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


%%html
<div id="crciweightcnt" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="https://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');

    var divid = '#crciweightcnt';
	jQuery(divid).asker({
	    id: divid,
	    question: "Which network has more parameters to learn?",
		answers: ["Classic","CNN"],
        server: "https://bits.csb.pitt.edu/asker.js/example/asker.cgi",
		charter: chartmaker})
    
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();


</script>


import torch  # note package is not called pytorch

T = torch.rand(3,4)
T

tensor([[0.3813, 0.8895, 0.3046, 0.4749],
        [0.8915, 0.8743, 0.0736, 0.9404],
        [0.6782, 0.1015, 0.2897, 0.2399]])


T.shape,T.dtype,T.device,T.requires_grad

(torch.Size([3, 4]), torch.float32, device(type='cpu'), False)


import torch.nn as nn
import torch.nn.functional as F


class MyNet(nn.Module):
    def __init__(self):  #initialize submodules here - this defines our network architecture
        super(MyNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1) 
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.fc1 = nn.Linear(2304, 10) #mystery X

    def forward(self, x): # this actually applies the operations
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2)  # POOL    
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=2, stride=2) # POOL
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x


from torchvision import datasets
train_data = datasets.MNIST(root='../data', train=True,download=True)
test_data = datasets.MNIST(root='../data', train=False)

/usr/local/lib/python3.10/dist-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory
  warn(f"Failed to load image Python extension: {e}")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz

  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz

  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz

  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz

  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw


train_data[0]

(<PIL.Image.Image image mode=L size=28x28 at 0x7F3DAE7BB730>, 5)


train_data[0][0]


from torchvision import transforms
train_data = datasets.MNIST(root='../data', train=True,transform=transforms.ToTensor())
test_data = datasets.MNIST(root='../data', train=False,transform=transforms.ToTensor())


train_data[0][0]

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0118, 0.0706, 0.0706, 0.0706,
          0.4941, 0.5333, 0.6863, 0.1020, 0.6510, 1.0000, 0.9686, 0.4980,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.1176, 0.1412, 0.3686, 0.6039, 0.6667, 0.9922, 0.9922, 0.9922,
          0.9922, 0.9922, 0.8824, 0.6745, 0.9922, 0.9490, 0.7647, 0.2510,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1922,
          0.9333, 0.9922, 0.9922, 0.9922, 0.9922, 0.9922, 0.9922, 0.9922,
          0.9922, 0.9843, 0.3647, 0.3216, 0.3216, 0.2196, 0.1529, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0706,
          0.8588, 0.9922, 0.9922, 0.9922, 0.9922, 0.9922, 0.7765, 0.7137,
          0.9686, 0.9451, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.3137, 0.6118, 0.4196, 0.9922, 0.9922, 0.8039, 0.0431, 0.0000,
          0.1686, 0.6039, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0549, 0.0039, 0.6039, 0.9922, 0.3529, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.5451, 0.9922, 0.7451, 0.0078, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0431, 0.7451, 0.9922, 0.2745, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.1373, 0.9451, 0.8824, 0.6275,
          0.4235, 0.0039, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3176, 0.9412, 0.9922,
          0.9922, 0.4667, 0.0980, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1765, 0.7294,
          0.9922, 0.9922, 0.5882, 0.1059, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0627,
          0.3647, 0.9882, 0.9922, 0.7333, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.9765, 0.9922, 0.9765, 0.2510, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1804, 0.5098,
          0.7176, 0.9922, 0.9922, 0.8118, 0.0078, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.1529, 0.5804, 0.8980, 0.9922,
          0.9922, 0.9922, 0.9804, 0.7137, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0941, 0.4471, 0.8667, 0.9922, 0.9922, 0.9922,
          0.9922, 0.7882, 0.3059, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0902, 0.2588, 0.8353, 0.9922, 0.9922, 0.9922, 0.9922, 0.7765,
          0.3176, 0.0078, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0706, 0.6706,
          0.8588, 0.9922, 0.9922, 0.9922, 0.9922, 0.7647, 0.3137, 0.0353,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.2157, 0.6745, 0.8863, 0.9922,
          0.9922, 0.9922, 0.9922, 0.9569, 0.5216, 0.0431, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.5333, 0.9922, 0.9922, 0.9922,
          0.8314, 0.5294, 0.5176, 0.0627, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000]]])


plt.imshow(train_data[0][0][0])

<matplotlib.image.AxesImage at 0x7f3dae3469b0>


#process 10 randomly sampled images at a time
train_loader = torch.utils.data.DataLoader(train_data,batch_size=10,shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=10,shuffle=False)

#instantiate our neural network and put it on the GPU
model = MyNet() #.to('cuda')


batch = next(iter(train_loader))
batch

[tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         ...,
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]]]),
 tensor([0, 0, 4, 6, 7, 7, 8, 9, 9, 0])]


output = model(batch[0]) #.to('cuda')) # model is on GPU, so must put input there too
output

tensor([[-0.0264, -0.0052,  0.0288, -0.0266, -0.0390,  0.0121,  0.0207,  0.1086,
         -0.0362,  0.0214],
        [ 0.0168,  0.0019,  0.0495, -0.0814, -0.0272,  0.0239,  0.0129,  0.0925,
         -0.0307,  0.0860],
        [-0.0115,  0.0235,  0.0038, -0.0747, -0.0348,  0.0084, -0.0043,  0.1093,
          0.0080,  0.0250],
        [ 0.0307,  0.0153,  0.0430, -0.0206, -0.0415,  0.0040, -0.0510,  0.0715,
         -0.0046,  0.0503],
        [-0.0024,  0.0223,  0.0629, -0.0737, -0.0219, -0.0181, -0.0311,  0.0714,
         -0.0224,  0.0602],
        [ 0.0164,  0.0389,  0.0460, -0.0791,  0.0218, -0.0165,  0.0123,  0.0649,
         -0.0527,  0.0471],
        [ 0.0376,  0.0146,  0.0585, -0.0502, -0.0367, -0.0097, -0.0007,  0.0520,
         -0.0421,  0.0640],
        [-0.0274,  0.0127,  0.0645, -0.0735, -0.0359, -0.0039, -0.0210,  0.0793,
         -0.0042,  0.0555],
        [ 0.0269,  0.0126,  0.0726, -0.0535, -0.0397,  0.0165, -0.0198,  0.0717,
         -0.0030,  0.0307],
        [ 0.0109,  0.0241,  0.0444, -0.0528, -0.0312,  0.0300, -0.0122,  0.1235,
         -0.0409,  0.0558]], grad_fn=<AddmmBackward0>)


loss = F.cross_entropy(output,batch[1]) #.to('cuda')) #combines log softmax and 
loss

tensor(2.3030, grad_fn=<NllLossBackward0>)


loss.backward() # sets grad, but does not change parameters of model


%%time
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001) # need to tell optimizer what it is optimizing

losses = []
for epoch in range(10):
    for i, (img,label) in enumerate(train_loader):
        optimizer.zero_grad()  # IMPORTANT!
        #img, label = img.to('cuda'), label.to('cuda')
        output = model(img)
        loss = F.cross_entropy(output, label)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

CPU times: user 36min 21s, sys: 2.32 s, total: 36min 23s
Wall time: 3min 2s


plt.plot(losses)

[<matplotlib.lines.Line2D at 0x7f3dae393e20>]


correct = 0
with torch.no_grad(): #no need for gradients - won't be calling backward to clear them
    for img, label in test_loader:
        #img, label = img.to('cuda'), label.to('cuda')
        output = F.softmax(model(img),dim=1)
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(label.view_as(pred)).sum().item()
        
print("Accuracy",correct/len(test_loader.dataset))

Accuracy 0.9711


Identity	Blur	Edge Detection

Machine Learning Workshop¶

11/4/2022¶

What is machine learning?¶

Unsupervised Learning¶

Supervised Learning¶

Labels¶

Classification¶

Regression¶

Features¶

Example¶

sklearn¶

Linear Model¶

Linear Model¶

Evaluating Predictions¶

Confusion matrix¶

Other measures¶

ROC Curves¶

AUC¶

Evaluating Predictions¶

Correct Model Evaluation¶

Cross Validation¶

K-Fold Cross Validation¶

Cross Validation Variations¶

Vocab¶

Cross Validation¶

Alternatively...¶

Generalization Error¶

LASSO¶

Lasso vs. LinearRegression¶

Model Parameter Optimization¶

Model specific optimization¶

Support Vector Machine (SVM)¶

SVM Kernels¶

Training SVM¶

Using class predictions instead of probabilities¶

Optimizing SVM¶

Nearest Neighbors (NN)¶

Training NN¶

Optimizing k-NN¶

Decision Trees¶

Random Forest¶

Training a Decision Tree¶

Optimizing a Decision Tree¶

Regression¶

Key Points¶

Which method works best?¶

Project¶

Deep Learning and PyTorch¶

Perceptron¶

Perceptron¶

Neurons¶

Activation Functions: Step (Perceptron)¶

Activation Functions: Sigmoid (Logistic)¶

Activation Functions: tanh¶

Activation Functions: ReLU¶

Networks¶

Networks¶

Neural Networks¶

Stochastic Gradient Descent¶

Loss Functions¶

Backpropagation¶

Backpropagation as the Chain Rule¶

Deep Learning¶

Convolutional Neural Nets¶

Convolution Filters¶

Feature Maps¶

Convolutional Layers¶

Convolutional Layers¶

Pooling¶

PyTorch¶

PyTorch Tensors¶

Modules vs Functional¶

A network is a module¶

MNIST¶

Training MNIST¶

Training MNIST¶

Training MNIST¶

Testing MNIST¶

Some Failures¶

Generative vs. Discriminative¶

`sklearn`¶

PyTorch ¶