Copied!







x = ["a happy happy phrase",
     "another super happy phrase",
     "a sad phrase",
     "an unhappy phrase"]

y = [1, 1, 0, 0]

x = ["a happy happy phrase",
     "another super happy phrase",
     "a sad phrase",
     "an unhappy phrase"]

y = [1, 1, 0, 0]





Copied!







x = ["a happy happy phrase",
     "another super happy phrase",
     "a sad phrase",
     "an unhappy phrase"]

y = [1, 1, 0, 0]

x = ["a happy happy phrase",
     "another super happy phrase",
     "a sad phrase",
     "an unhappy phrase"]

y = [1, 1, 0, 0]





Copied!







from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import numpy as np 

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import numpy as np





Copied!







from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import numpy as np 

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import numpy as np





Copied!







countvectorizer = CountVectorizer(binary=True)
countvectorizer.fit(x)
x_vect = countvectorizer.transform(x)
print(x_vect.toarray())

countvectorizer = CountVectorizer(binary=True)
countvectorizer.fit(x)
x_vect = countvectorizer.transform(x)
print(x_vect.toarray())

[[0 0 1 1 0 0 0]
 [0 1 1 1 0 1 0]
 [0 0 0 1 1 0 0]
 [1 0 0 1 0 0 1]]





Copied!







countvectorizer = CountVectorizer(binary=True)
countvectorizer.fit(x)
x_vect = countvectorizer.transform(x)
print(x_vect.toarray())

countvectorizer = CountVectorizer(binary=True)
countvectorizer.fit(x)
x_vect = countvectorizer.transform(x)
print(x_vect.toarray())

[[0 0 1 1 0 0 0]
 [0 1 1 1 0 1 0]
 [0 0 0 1 1 0 0]
 [1 0 0 1 0 0 1]]





Copied!







countvectorizer.vocabulary_

countvectorizer.vocabulary_

{'happy': 2,
 'phrase': 3,
 'another': 1,
 'super': 5,
 'sad': 4,
 'an': 0,
 'unhappy': 6}





Copied!







countvectorizer.vocabulary_

countvectorizer.vocabulary_

{'happy': 2,
 'phrase': 3,
 'another': 1,
 'super': 5,
 'sad': 4,
 'an': 0,
 'unhappy': 6}





Copied!





Copied!





Copied!







from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

clf.fit(x_vect, y)

from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

clf.fit(x_vect, y)

LogisticRegression()





Copied!







from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

clf.fit(x_vect, y)

from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

clf.fit(x_vect, y)

LogisticRegression()





Copied!







clf.coef_

clf.coef_

array([[-3.03694267e-01,  2.71750141e-01,  6.63081739e-01,
         1.46448395e-04, -3.59241023e-01,  2.71750141e-01,
        -3.03694267e-01]])





Copied!







clf.coef_

clf.coef_

array([[-3.03694267e-01,  2.71750141e-01,  6.63081739e-01,
         1.46448395e-04, -3.59241023e-01,  2.71750141e-01,
        -3.03694267e-01]])





Copied!







y_pred = clf.predict(x_vect)
print(y_pred, y)

y_pred = clf.predict(x_vect)
print(y_pred, y)

[1 1 0 0] [1, 1, 0, 0]





Copied!







y_pred = clf.predict(x_vect)
print(y_pred, y)

y_pred = clf.predict(x_vect)
print(y_pred, y)

[1 1 0 0] [1, 1, 0, 0]





Copied!







x_test = [
    "today I feel so happy",
    "joy dwells in my heart",
    "today I can only feel the darkness",
    "I feel a bit unhappy about menial things of life"
]

x_vect_test = countvectorizer.transform(x_test)
y_pred_test = clf.predict(x_vect_test)
print(y_pred_test)

x_test = [
    "today I feel so happy",
    "joy dwells in my heart",
    "today I can only feel the darkness",
    "I feel a bit unhappy about menial things of life"
]

x_vect_test = countvectorizer.transform(x_test)
y_pred_test = clf.predict(x_vect_test)
print(y_pred_test)

[1 0 0 0]





Copied!







x_test = [
    "today I feel so happy",
    "joy dwells in my heart",
    "today I can only feel the darkness",
    "I feel a bit unhappy about menial things of life"
]

x_vect_test = countvectorizer.transform(x_test)
y_pred_test = clf.predict(x_vect_test)
print(y_pred_test)

x_test = [
    "today I feel so happy",
    "joy dwells in my heart",
    "today I can only feel the darkness",
    "I feel a bit unhappy about menial things of life"
]

x_vect_test = countvectorizer.transform(x_test)
y_pred_test = clf.predict(x_vect_test)
print(y_pred_test)

[1 0 0 0]





Copied!







clf.predict_proba(x_vect_test)

clf.predict_proba(x_vect_test)

array([[0.39122204, 0.60877796],
       [0.55500236, 0.44499764],
       [0.55500236, 0.44499764],
       [0.62822222, 0.37177778]])





Copied!







clf.predict_proba(x_vect_test)

clf.predict_proba(x_vect_test)

array([[0.39122204, 0.60877796],
       [0.55500236, 0.44499764],
       [0.55500236, 0.44499764],
       [0.62822222, 0.37177778]])





Copied!







z = clf.decision_function(x_vect_test)
print(z)

z = clf.decision_function(x_vect_test)
print(z)

[ 0.44217834 -0.2209034  -0.2209034  -0.52459766]





Copied!







z = clf.decision_function(x_vect_test)
print(z)

z = clf.decision_function(x_vect_test)
print(z)

[ 0.44217834 -0.2209034  -0.2209034  -0.52459766]





Copied!







1/(1+np.exp(-z))

1/(1+np.exp(-z))

array([0.60877796, 0.44499764, 0.44499764, 0.37177778])





Copied!







1/(1+np.exp(-z))

1/(1+np.exp(-z))

array([0.60877796, 0.44499764, 0.44499764, 0.37177778])

	penalty	'l2'
	dual	False
	tol	0.0001
	C	1.0
	fit_intercept	True
	intercept_scaling	1
	class_weight	None
	random_state	None
	solver	'lbfgs'
	max_iter	100
	multi_class	'deprecated'
	verbose	0
	warm_start	False
	n_jobs	None
	l1_ratio	None

Keys	Action
`?`	Open this help
`n`	Next page
`p`	Previous page
`s`	Search

Sklearn's code with a toy example¶

Our dataset¶

The Vectorizer¶

Logistic Regression¶