import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
import statsmodels.api as sm


def display(w=16,h=4):
    plt.figure(figsize=(w, h),
               dpi=150)


dataset = pd.read_csv('dataset_label.csv')
dataset.head()


df = dataset[['Class', 'Age', 'Score', 'Level']]


describe = df.describe().T
display(h=1)
sns.heatmap(describe, annot=True, cmap='viridis')
plt.yticks(rotation=0)
plt.title('Description of The Data')
plt.show()


sns.displot(data=df, x='Age', kind='kde', height=3, aspect=(5/1))
plt.title('Skewness of Age')
plt.show()


sns.displot(data=df, x='Score', kind='kde', height=3, aspect=(5/1))
plt.title('Skewness of English Score')
plt.show()


sns.displot(data=df, x='Level', kind='kde', height=3, aspect=(5/1))
plt.title('Skewness of Level of Class')
plt.show()


display(h=6)
sns.barplot(data=df, x=df.index, y='Age')
plt.title('Age Each Students')
plt.xlabel('Students')
plt.ylabel('Age')
plt.show()


display(h=6)
sns.barplot(data=df, x=df.index, y='Score')
plt.title('Score Each Respondenst')
plt.xlabel('Respondents')
plt.ylabel('Score')
plt.show()


order_list = ['Diligent', 'Empathy', 'Integrity', 'Responsible', 'Creativity', 'Tolerance', 'Honesty', 'Enthusiastic']


display()
sns.boxplot(df, x='Class', y='Age', order=order_list)
plt.title('Boxplot Age Variable')
plt.show()


display()
sns.boxplot(df, x='Class', y='Score', order=order_list)
plt.title('Boxplot English Score Variable')
plt.show()


corr = df[['Age', 'Score', 'Level']].corr().round(3)
display()
sns.heatmap(corr, annot=True)
plt.title('Correlation Between Age, English Score, and Level Class')
plt.yticks(rotation=0)
plt.show()


display(h=6)
sns.regplot(data=df, x='Age', y='Score')
plt.title('Regression Line Variable Age VS Score')
plt.show()


x = df[['Age', 'Score']].values
y = df['Level']


x = sm.add_constant(x)
variable = ['CONST','Age', 'Score']


model = sm.OLS(exog=x, endog=y).fit()
print(model.summary(xname=variable))

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  Level   R-squared:                       0.781
Model:                            OLS   Adj. R-squared:                  0.773
Method:                 Least Squares   F-statistic:                     98.20
Date:                Wed, 21 Jun 2023   Prob (F-statistic):           7.08e-19
Time:                        22:19:28   Log-Likelihood:                -81.012
No. Observations:                  58   AIC:                             168.0
Df Residuals:                      55   BIC:                             174.2
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
CONST         -2.2198      0.578     -3.839      0.000      -3.379      -1.061
Age            0.4112      0.057      7.158      0.000       0.296       0.526
Score          0.1684      0.049      3.445      0.001       0.070       0.266
==============================================================================
Omnibus:                        0.864   Durbin-Watson:                   2.472
Prob(Omnibus):                  0.649   Jarque-Bera (JB):                0.957
Skew:                          -0.242   Prob(JB):                        0.620
Kurtosis:                       2.597   Cond. No.                         63.1
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


print(f'the {(1-model.rsquared_adj).round(3)*100} % can be explained by variables I do not research, such as confident level and comprehension.')

the 22.7 % can be explained by variables I do not research, such as confident level and comprehension.

BACKGROUND¶

YAYASAN CIPTA MANDIRI¶

YCM CLASSES¶

IDENTIFICATION OF PROBLEM¶

INSTRUMENTS OF RESEARCH¶

SCOPE AND LIMITATION OF PROBLEM¶

THE EFFECT OF AGE AND ENGLISH SCORE ON LEVEL CLASS IN YAYASAN CIPTA MANDIRI¶

LOAD LIBRARY AND DATA¶

EXPLORATORY DATA¶

ANALYSIS DATA¶

CONCLUSION AND IMPROVEMENT¶

CLOSING¶

	Class	Name	Age	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	Score	Level
0	Diligent	nauffal nur rizky	17	1	1	1	1	1	1	1	1	1	0	1	1	1	1	1	1	15	8
1	Diligent	muhammad amarif puja adiria	18	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	14	8
2	Tolerance	adela dzakira aftani	10	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	1	3
3	Honesty	aretha deandra yusuf	9	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	2	2
4	Honesty	najla sabria rachmat	11	1	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	3	2