import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns


df = pd.read_csv('Restaurant-Reviews.tsv', sep='\t', quoting=3)


df.head()


df.tail()


df.isna().sum()

Review    0
Liked     0
dtype: int64


print(len(df))
print(len(df[df['Liked'] == 1]))

1000
500


import re
import nltk


nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\yandiher\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!

True


from nltk.corpus import stopwords


df['Review'][0]

'Wow... Loved this place.'


review = re.sub(pattern='[^a-zA-Z]',
                repl=' ',
                string=df['Review'][0]).lower().split()


preview = []
for word in review:
    if word not in stopwords.words('english'):
        preview.append(word)
print(preview)
print(review)

['wow', 'loved', 'place']
['wow', 'loved', 'this', 'place']


review = [word for word in review if word not in stopwords.words(fileids='english')]


from nltk.stem import PorterStemmer
pStemmer = PorterStemmer()


review = [pStemmer.stem(word=word) for word in review]


review = ' '.join(review)
review

'wow love place'


corpus = []
for i in range (len(df)):
    review = re.sub(pattern='[^a-zA-Z]',
                    repl=' ',
                    string=df['Review'][i])
    review = review.lower()
    review = review.split()
    review = [pStemmer.stem(word) for word in review if word not in stopwords.words(fileids='english')]
    review = ' '.join(review)

    corpus.append(review)


print(corpus[6])
print(df['Review'].iloc[6])

honeslti tast fresh
Honeslty it didn't taste THAT fresh.)


from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vectorizer.fit(corpus)

CountVectorizer()

CountVectorizer()


x = vectorizer.transform(corpus).toarray()
y = df['Liked'].values


from sklearn.model_selection import train_test_split


xTrain, xTest, yTrain, yTest = train_test_split(x, y, train_size=0.8, random_state=42, stratify=y)


from sklearn.neural_network import MLPClassifier
model = MLPClassifier()
model.fit(xTrain, yTrain)
prediction = model.predict(xTest)


from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay


print('the accuracy of the model is ', accuracy_score(y_true=yTest, y_pred=prediction))
print(classification_report(y_true=yTest, y_pred=prediction))

the accuracy of the model is  0.77
              precision    recall  f1-score   support

           0       0.79      0.73      0.76       100
           1       0.75      0.81      0.78       100

    accuracy                           0.77       200
   macro avg       0.77      0.77      0.77       200
weighted avg       0.77      0.77      0.77       200


ConfusionMatrixDisplay.from_predictions(y_true=yTest, y_pred=prediction)
plt.show()


def pred(text):
    text = vectorizer.transform(text).toarray()
    text = model.predict(text)
    if test == 1:
        print('the customer(s) love our restaurant.')
    else:
        print('the customer(s) do not like our restaurant.')


test = ['how is it possible that there was a cockroach inside my meal?']


pred(test)

the customer(s) do not like our restaurant.

	Review	Liked
0	Wow... Loved this place.	1
1	Crust is not good.	0
2	Not tasty and the texture was just nasty.	0
3	Stopped by during the late May bank holiday of...	1
4	The selection on the menu was great and so wer...	1

	Review	Liked
995	I think food should have flavor and texture an...	0
996	Appetite instantly gone.	0
997	Overall I was not impressed and would not go b...	0
998	The whole experience was underwhelming, and I ...	0
999	Then, as if I hadn't wasted enough of my life ...	0

import necessary library¶

load dataset¶

exploratory data analysis¶

data cleaning¶

bags of word model¶

split data¶

build algorithm¶

evaluation¶