# LightGBM baseline import pandas as pd import lightgbm as lgb from sklearn.model_selection import train_test_split df = pd.read_csv('../input/digit-recognizer/train.csv') col = ['pixel%d'%i for i in range(784)] lgb_params = { "objective" : "multiclass", "metric" : "multi_logloss", "num_class" : 10, "max_depth" : 5, "num_leaves" : 15, "learning_rate" : 0.1, "bagging_fraction" : 1.0, "feature_fraction" : 1.0, "lambda_l1" : 0.0, "lambda_l2" : 0.0, } X_train, X_test, Y_train, Y_test = train_test_split(df[col], df['label'], test_size=0.1) lgtrain = lgb.Dataset(X_train, label=Y_train) lgtest = lgb.Dataset(X_test, label=Y_test) lgb_clf = lgb.train(lgb_params, lgtrain, 500, valid_sets=[lgtrain, lgtest], early_stopping_rounds=5, verbose_eval=10) df = pd.read_csv('../input/digit-recognizer/test.csv') res = lgb_clf.predict( df[col] ).argmax(axis=1) df = pd.read_csv('../input/digit-recognizer/sample_submission.csv') df['Label'] = res df.to_csv('submission.csv', index=False) ## Ref - https://www.kaggle.com/tanreinama/lightgbm-baseline