# Importing the libraries import pandas as pd import numpy as np # Importing the dataset dataset = pd.read_csv('../datasets/50_Startups.csv') # 取数据的所有行,前4列为x X = dataset.iloc[:, :4].values # 取所有行,第五列为y Y = dataset.iloc[:, 4].values # Encoding Categorical data from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder = LabelEncoder() X[: , 3] = labelencoder.fit_transform(X[:, 3]) onehotencoder = OneHotEncoder(categorical_features=[3]) X = onehotencoder.fit_transform(X).toarray() # Avoiding Dummy Variable Trap X = X[: , 1:] # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # Fitting Multiple Linear Regression to the Training set from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, Y_train) # Predicting the Test set results y_pred = regressor.predict(X_test) # regression evaluation from sklearn.metrics import r2_score print(r2_score(Y_test, y_pred))