2021年5月6日星期四

Logistic Regression weights becoming very large negative numbers

I am trying to use a code snippet on logistic regression from Python Machine Learning 3rd ed. and my code seems to make my weights in my model very negative meaning that every prediction is the same (0). I've assume the error is in my data preparation code but cannot figure out what is going on. What could be causing my model's weights to get so exceedingly large?

Logistic Regression:

'''

class LogisticRegression():  def __init__(self, learning_rate, n_iter, random_state):      self.learning_rate = learning_rate      self.n_iter = n_iter      self.random_state = random_state    def fit(self, X, y):          randomGenerator = np.random.RandomState(self.random_state)          self.weights_ = randomGenerator.normal(loc = 0.0, scale = 0.01, size = 1 + X.shape[1])          self.cost_ = []           for _ in range(self.n_iter):              net_input = self.net_input(X)              output = self.activation(net_input)              errors = (y - output)              # weights              self.weights_[1:] += self.learning_rate * X.T.dot(errors)              # bias              self.weights_[0] += self.learning_rate * errors.sum()              # compute logistic cost function              cost = (-y.dot(np.log(output)) - ((1-y).dot(np.log(1- output))))              self.cost_.append(cost)          return self  def net_input(self, X):      return np.dot(X, self.weights_[1:]) + self.weights_[0]  def activation(self, z):      return 1. / (1. + np.exp(-np.clip(z,-250,250)))  def predict(self, X):      return np.where(self.net_input(X) >= 0.0, 1, 0)  

'''

Data Preparation:

'''

X, y = load_iris(return_X_y=(True))  X = scaler.fit_transform(X)  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state = 7)    scaler = MinMaxScaler()    lr1 = LogisticRegression(learning_rate = .05, n_iter = 1000, random_state = 7)  lr2 = LogisticRegression(learning_rate = .05, n_iter = 1000, random_state = 7)  lr3 = LogisticRegression(learning_rate = .05, n_iter = 1000, random_state = 7)    y1 = np.where(y_train == 0, 1, -1)  y2 = np.where(y_train == 1, 1, -1)  y3 = np.where(y_train == 2, 1, -1)    print('X',X_train_scaled)  print('y1',y1)  lr1.fit(X_train, y1)  lr2.fit(X_train, y2)  lr3.fit(X_train, y3)    print('weights: ', lr1.weights_)  y1Hat = lr1.net_input(X_test)  y2Hat = lr2.net_input(X_test)  y3Hat = lr3.net_input(X_test)  print('y1hat: ', y1Hat)    votes = [y1Hat, y2Hat, y3Hat]      max = np.maximum.reduce(votes)  yHat = np.where(y1Hat == max, 0 , np.where(y2Hat == max, 1, 2))  errorVect = np.where(yHat - y_test == 0, 0, 1)  error = np.sum(errorVect)  errorRate = error/len(y_test)  print('error rate: ',errorRate)  

'''

'''

weights:  [-1702.80649129 -1635.93559503  -149.15311552 -2285.75550112 -2286.69789   ]  y1hat:  [-6071.18319092 -5213.48250666 -2432.82402253 -5334.66292111   -5061.83880817 -2035.67101872 -6309.46072025 -4986.09806574   -2342.97057131 -5835.08490684 -6415.80703465 -5703.70348465   -2500.18473396 -8021.09777309 -2495.69392957 -5952.42731292   -6830.38517605 -6561.25933228 -2556.14184888 -2234.3445571   -4877.54319548 -7438.09339675 -5624.12218513 -5738.31611478   -6982.02887454 -5997.3171922  -5410.15837234 -4532.12426782   -6082.32157053 -7359.52316176 -6102.7972787  -4439.26972552   -1968.57807291 -7105.22886933 -5531.33878679 -2317.21152353   -2640.4959799  -2981.13952734]  

'''

Of course, the prediction is terrible since it predicts 0 every time. I not sure where I went wrong but the negative weights really throw me off as to where the problem lies. Any ideas for how to figure out the cause are appreciated.

https://stackoverflow.com/questions/67428530/logistic-regression-weights-becoming-very-large-negative-numbers May 07, 2021 at 11:09AM

没有评论:

发表评论