### standard packages/modules
import math
import matplotlib.pyplot as plt
import numpy as np
import scipy
import pandas as pd


from sklearn.linear_model import LassoCV
from sklearn.linear_model import RidgeCV
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import lasso_path, enet_path

## read in data
cd = pd.read_csv("http://www.rob-mcculloch.org/data/usedcars.csv")

## pull off y
y = cd["price"]
plt.hist(y)

## get x and make dummies
xn = cd.iloc[:,[3,4]]
xf = cd.iloc[:,[1,2,5,6,7,8,9,10]]
xfd = pd.get_dummies(xf,drop_first=True)
xx = pd.concat([xn,xfd],axis=1)
X = xx.values

## train, test
rng = np.random.RandomState(34)
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,random_state=rng, test_size=.2)

## scale x's
scaler = preprocessing.StandardScaler().fit(Xtrain)
Xtrains = scaler.transform(Xtrain).copy()
Xtests = scaler.transform(Xtest).copy()

##check
Xtrains.mean(axis=0)
Xtrains.std(axis=0)
plt.scatter(Xtrains[:,0],Xtrain[:,0])


##LassoCV
reg = LassoCV(cv=5, random_state=0).fit(Xtrains, ytrain)
print(reg.coef_) #coefficients

#yhat
yhatte = reg.predict(Xtests)
plt.scatter(ytest,yhatte)
plt.scatter(ytest,ytest)

#cv results
msep = reg.mse_path_
msev = msep.sum(axis=1)


#plot alphas vs. mse
plt.scatter(np.log(reg.alphas_),msev)

## lasso path
eps = 5e-3
alphas_lasso, coefs_lasso, _ = lasso_path(Xtrains, ytrain, eps, fit_intercept=True)

#plot lasso path
for i in range(37):
   plt.scatter(np.log(alphas_lasso),coefs_lasso[i,:])


##RidgeCV
reg = RidgeCV(cv=5).fit(Xtrains, ytrain)
print(reg.coef_) #coefficients
yhatR = reg.predict(Xtests)

plt.scatter(yhatR,yhatte,c="blue")
plt.scatter(yhatte,yhatte,s=.1,c="red")








