I am trying to use a code snippet on logistic regression from Python Machine Learning 3rd ed. and my code seems to make my weights in my model very negative meaning that every prediction is the same (0). I've assume the error is in my data preparation code but cannot figure out what is going on. What could be causing my model's weights to get so exceedingly large?
Logistic Regression:
'''
class LogisticRegression(): def __init__(self, learning_rate, n_iter, random_state): self.learning_rate = learning_rate self.n_iter = n_iter self.random_state = random_state def fit(self, X, y): randomGenerator = np.random.RandomState(self.random_state) self.weights_ = randomGenerator.normal(loc = 0.0, scale = 0.01, size = 1 + X.shape[1]) self.cost_ = [] for _ in range(self.n_iter): net_input = self.net_input(X) output = self.activation(net_input) errors = (y - output) # weights self.weights_[1:] += self.learning_rate * X.T.dot(errors) # bias self.weights_[0] += self.learning_rate * errors.sum() # compute logistic cost function cost = (-y.dot(np.log(output)) - ((1-y).dot(np.log(1- output)))) self.cost_.append(cost) return self def net_input(self, X): return np.dot(X, self.weights_[1:]) + self.weights_[0] def activation(self, z): return 1. / (1. + np.exp(-np.clip(z,-250,250))) def predict(self, X): return np.where(self.net_input(X) >= 0.0, 1, 0) '''
Data Preparation:
'''
X, y = load_iris(return_X_y=(True)) X = scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state = 7) scaler = MinMaxScaler() lr1 = LogisticRegression(learning_rate = .05, n_iter = 1000, random_state = 7) lr2 = LogisticRegression(learning_rate = .05, n_iter = 1000, random_state = 7) lr3 = LogisticRegression(learning_rate = .05, n_iter = 1000, random_state = 7) y1 = np.where(y_train == 0, 1, -1) y2 = np.where(y_train == 1, 1, -1) y3 = np.where(y_train == 2, 1, -1) print('X',X_train_scaled) print('y1',y1) lr1.fit(X_train, y1) lr2.fit(X_train, y2) lr3.fit(X_train, y3) print('weights: ', lr1.weights_) y1Hat = lr1.net_input(X_test) y2Hat = lr2.net_input(X_test) y3Hat = lr3.net_input(X_test) print('y1hat: ', y1Hat) votes = [y1Hat, y2Hat, y3Hat] max = np.maximum.reduce(votes) yHat = np.where(y1Hat == max, 0 , np.where(y2Hat == max, 1, 2)) errorVect = np.where(yHat - y_test == 0, 0, 1) error = np.sum(errorVect) errorRate = error/len(y_test) print('error rate: ',errorRate) '''
'''
weights: [-1702.80649129 -1635.93559503 -149.15311552 -2285.75550112 -2286.69789 ] y1hat: [-6071.18319092 -5213.48250666 -2432.82402253 -5334.66292111 -5061.83880817 -2035.67101872 -6309.46072025 -4986.09806574 -2342.97057131 -5835.08490684 -6415.80703465 -5703.70348465 -2500.18473396 -8021.09777309 -2495.69392957 -5952.42731292 -6830.38517605 -6561.25933228 -2556.14184888 -2234.3445571 -4877.54319548 -7438.09339675 -5624.12218513 -5738.31611478 -6982.02887454 -5997.3171922 -5410.15837234 -4532.12426782 -6082.32157053 -7359.52316176 -6102.7972787 -4439.26972552 -1968.57807291 -7105.22886933 -5531.33878679 -2317.21152353 -2640.4959799 -2981.13952734] '''
Of course, the prediction is terrible since it predicts 0 every time. I not sure where I went wrong but the negative weights really throw me off as to where the problem lies. Any ideas for how to figure out the cause are appreciated.
https://stackoverflow.com/questions/67428530/logistic-regression-weights-becoming-very-large-negative-numbers May 07, 2021 at 11:09AM
没有评论:
发表评论