Copied!







import numpy as np 
import re

import numpy as np 
import re





Copied!







import numpy as np 
import re

import numpy as np 
import re





Copied!







text = """Natural language processing (NLP) is a subfield of linguistics, computer science, information engineering, and artificial intelligence
concerned with the interactions between computers and human (natural) languages, in particular how to program computers to process and analyze
large amounts of natural language data.
Challenges in natural language processing frequently involve speech recognition, natural language understanding,
and natural language generation. Nowadays, Artificial Intelligence is a highly trending technology and is gaining popularity among NLP developers.
Although Artificial Intelligence is a marvelous technology and has wonderful results, it is still a developing technology and its ethical use is a major concern.
"""

words = re.findall(r"\b\w+\b", text.upper())

text = """Natural language processing (NLP) is a subfield of linguistics, computer science, information engineering, and artificial intelligence
concerned with the interactions between computers and human (natural) languages, in particular how to program computers to process and analyze
large amounts of natural language data.
Challenges in natural language processing frequently involve speech recognition, natural language understanding,
and natural language generation. Nowadays, Artificial Intelligence is a highly trending technology and is gaining popularity among NLP developers.
Although Artificial Intelligence is a marvelous technology and has wonderful results, it is still a developing technology and its ethical use is a major concern.
"""

words = re.findall(r"\b\w+\b", text.upper())





Copied!







text = """Natural language processing (NLP) is a subfield of linguistics, computer science, information engineering, and artificial intelligence
concerned with the interactions between computers and human (natural) languages, in particular how to program computers to process and analyze
large amounts of natural language data.
Challenges in natural language processing frequently involve speech recognition, natural language understanding,
and natural language generation. Nowadays, Artificial Intelligence is a highly trending technology and is gaining popularity among NLP developers.
Although Artificial Intelligence is a marvelous technology and has wonderful results, it is still a developing technology and its ethical use is a major concern.
"""

words = re.findall(r"\b\w+\b", text.upper())

text = """Natural language processing (NLP) is a subfield of linguistics, computer science, information engineering, and artificial intelligence
concerned with the interactions between computers and human (natural) languages, in particular how to program computers to process and analyze
large amounts of natural language data.
Challenges in natural language processing frequently involve speech recognition, natural language understanding,
and natural language generation. Nowadays, Artificial Intelligence is a highly trending technology and is gaining popularity among NLP developers.
Although Artificial Intelligence is a marvelous technology and has wonderful results, it is still a developing technology and its ethical use is a major concern.
"""

words = re.findall(r"\b\w+\b", text.upper())





Copied!







print(words)

print(words)

['NATURAL', 'LANGUAGE', 'PROCESSING', 'NLP', 'IS', 'A', 'SUBFIELD', 'OF', 'LINGUISTICS', 'COMPUTER', 'SCIENCE', 'INFORMATION', 'ENGINEERING', 'AND', 'ARTIFICIAL', 'INTELLIGENCE', 'CONCERNED', 'WITH', 'THE', 'INTERACTIONS', 'BETWEEN', 'COMPUTERS', 'AND', 'HUMAN', 'NATURAL', 'LANGUAGES', 'IN', 'PARTICULAR', 'HOW', 'TO', 'PROGRAM', 'COMPUTERS', 'TO', 'PROCESS', 'AND', 'ANALYZE', 'LARGE', 'AMOUNTS', 'OF', 'NATURAL', 'LANGUAGE', 'DATA', 'CHALLENGES', 'IN', 'NATURAL', 'LANGUAGE', 'PROCESSING', 'FREQUENTLY', 'INVOLVE', 'SPEECH', 'RECOGNITION', 'NATURAL', 'LANGUAGE', 'UNDERSTANDING', 'AND', 'NATURAL', 'LANGUAGE', 'GENERATION', 'NOWADAYS', 'ARTIFICIAL', 'INTELLIGENCE', 'IS', 'A', 'HIGHLY', 'TRENDING', 'TECHNOLOGY', 'AND', 'IS', 'GAINING', 'POPULARITY', 'AMONG', 'NLP', 'DEVELOPERS', 'ALTHOUGH', 'ARTIFICIAL', 'INTELLIGENCE', 'IS', 'A', 'MARVELOUS', 'TECHNOLOGY', 'AND', 'HAS', 'WONDERFUL', 'RESULTS', 'IT', 'IS', 'STILL', 'A', 'DEVELOPING', 'TECHNOLOGY', 'AND', 'ITS', 'ETHICAL', 'USE', 'IS', 'A', 'MAJOR', 'CONCERN']





Copied!







print(words)

print(words)

['NATURAL', 'LANGUAGE', 'PROCESSING', 'NLP', 'IS', 'A', 'SUBFIELD', 'OF', 'LINGUISTICS', 'COMPUTER', 'SCIENCE', 'INFORMATION', 'ENGINEERING', 'AND', 'ARTIFICIAL', 'INTELLIGENCE', 'CONCERNED', 'WITH', 'THE', 'INTERACTIONS', 'BETWEEN', 'COMPUTERS', 'AND', 'HUMAN', 'NATURAL', 'LANGUAGES', 'IN', 'PARTICULAR', 'HOW', 'TO', 'PROGRAM', 'COMPUTERS', 'TO', 'PROCESS', 'AND', 'ANALYZE', 'LARGE', 'AMOUNTS', 'OF', 'NATURAL', 'LANGUAGE', 'DATA', 'CHALLENGES', 'IN', 'NATURAL', 'LANGUAGE', 'PROCESSING', 'FREQUENTLY', 'INVOLVE', 'SPEECH', 'RECOGNITION', 'NATURAL', 'LANGUAGE', 'UNDERSTANDING', 'AND', 'NATURAL', 'LANGUAGE', 'GENERATION', 'NOWADAYS', 'ARTIFICIAL', 'INTELLIGENCE', 'IS', 'A', 'HIGHLY', 'TRENDING', 'TECHNOLOGY', 'AND', 'IS', 'GAINING', 'POPULARITY', 'AMONG', 'NLP', 'DEVELOPERS', 'ALTHOUGH', 'ARTIFICIAL', 'INTELLIGENCE', 'IS', 'A', 'MARVELOUS', 'TECHNOLOGY', 'AND', 'HAS', 'WONDERFUL', 'RESULTS', 'IT', 'IS', 'STILL', 'A', 'DEVELOPING', 'TECHNOLOGY', 'AND', 'ITS', 'ETHICAL', 'USE', 'IS', 'A', 'MAJOR', 'CONCERN']





Copied!







unique_words = set(words)

model = {}
for w in unique_words:
    model[w] = {}
    
for i in range(len(words)-1):
    palavra_atual = words[i]
    proxima_palavra = words[i+1]
    if proxima_palavra in model[palavra_atual]:
        model[palavra_atual][proxima_palavra] += 1
    else:
        model[palavra_atual][proxima_palavra] = 1

unique_words = set(words)

model = {}
for w in unique_words:
    model[w] = {}
    
for i in range(len(words)-1):
    palavra_atual = words[i]
    proxima_palavra = words[i+1]
    if proxima_palavra in model[palavra_atual]:
        model[palavra_atual][proxima_palavra] += 1
    else:
        model[palavra_atual][proxima_palavra] = 1





Copied!







unique_words = set(words)

model = {}
for w in unique_words:
    model[w] = {}
    
for i in range(len(words)-1):
    palavra_atual = words[i]
    proxima_palavra = words[i+1]
    if proxima_palavra in model[palavra_atual]:
        model[palavra_atual][proxima_palavra] += 1
    else:
        model[palavra_atual][proxima_palavra] = 1

unique_words = set(words)

model = {}
for w in unique_words:
    model[w] = {}
    
for i in range(len(words)-1):
    palavra_atual = words[i]
    proxima_palavra = words[i+1]
    if proxima_palavra in model[palavra_atual]:
        model[palavra_atual][proxima_palavra] += 1
    else:
        model[palavra_atual][proxima_palavra] = 1





Copied!







for w in model.keys():
    s = 0
    for k in model[w].keys():
        s += model[w][k]
    for k in model[w].keys():
        model[w][k] /= s

for w in model.keys():
    s = 0
    for k in model[w].keys():
        s += model[w][k]
    for k in model[w].keys():
        model[w][k] /= s





Copied!







for w in model.keys():
    s = 0
    for k in model[w].keys():
        s += model[w][k]
    for k in model[w].keys():
        model[w][k] /= s

for w in model.keys():
    s = 0
    for k in model[w].keys():
        s += model[w][k]
    for k in model[w].keys():
        model[w][k] /= s





Copied!







model

model

{'NOWADAYS': {'ARTIFICIAL': 1.0},
 'ITS': {'ETHICAL': 1.0},
 'DATA': {'CHALLENGES': 1.0},
 'INVOLVE': {'SPEECH': 1.0},
 'CONCERN': {},
 'ARTIFICIAL': {'INTELLIGENCE': 1.0},
 'COMPUTERS': {'AND': 0.5, 'TO': 0.5},
 'BETWEEN': {'COMPUTERS': 1.0},
 'ANALYZE': {'LARGE': 1.0},
 'LARGE': {'AMOUNTS': 1.0},
 'GENERATION': {'NOWADAYS': 1.0},
 'HAS': {'WONDERFUL': 1.0},
 'TRENDING': {'TECHNOLOGY': 1.0},
 'DEVELOPING': {'TECHNOLOGY': 1.0},
 'ALTHOUGH': {'ARTIFICIAL': 1.0},
 'SCIENCE': {'INFORMATION': 1.0},
 'TO': {'PROGRAM': 0.5, 'PROCESS': 0.5},
 'GAINING': {'POPULARITY': 1.0},
 'HUMAN': {'NATURAL': 1.0},
 'POPULARITY': {'AMONG': 1.0},
 'MARVELOUS': {'TECHNOLOGY': 1.0},
 'PROCESSING': {'NLP': 0.5, 'FREQUENTLY': 0.5},
 'STILL': {'A': 1.0},
 'NLP': {'IS': 0.5, 'DEVELOPERS': 0.5},
 'NATURAL': {'LANGUAGE': 0.8333333333333334, 'LANGUAGES': 0.16666666666666666},
 'LINGUISTICS': {'COMPUTER': 1.0},
 'INTERACTIONS': {'BETWEEN': 1.0},
 'CHALLENGES': {'IN': 1.0},
 'IS': {'A': 0.6666666666666666,
  'GAINING': 0.16666666666666666,
  'STILL': 0.16666666666666666},
 'INFORMATION': {'ENGINEERING': 1.0},
 'MAJOR': {'CONCERN': 1.0},
 'DEVELOPERS': {'ALTHOUGH': 1.0},
 'HOW': {'TO': 1.0},
 'FREQUENTLY': {'INVOLVE': 1.0},
 'A': {'SUBFIELD': 0.2,
  'HIGHLY': 0.2,
  'MARVELOUS': 0.2,
  'DEVELOPING': 0.2,
  'MAJOR': 0.2},
 'IN': {'PARTICULAR': 0.5, 'NATURAL': 0.5},
 'AMONG': {'NLP': 1.0},
 'USE': {'IS': 1.0},
 'AND': {'ARTIFICIAL': 0.14285714285714285,
  'HUMAN': 0.14285714285714285,
  'ANALYZE': 0.14285714285714285,
  'NATURAL': 0.14285714285714285,
  'IS': 0.14285714285714285,
  'HAS': 0.14285714285714285,
  'ITS': 0.14285714285714285},
 'WONDERFUL': {'RESULTS': 1.0},
 'PARTICULAR': {'HOW': 1.0},
 'PROCESS': {'AND': 1.0},
 'SPEECH': {'RECOGNITION': 1.0},
 'RECOGNITION': {'NATURAL': 1.0},
 'CONCERNED': {'WITH': 1.0},
 'PROGRAM': {'COMPUTERS': 1.0},
 'ETHICAL': {'USE': 1.0},
 'HIGHLY': {'TRENDING': 1.0},
 'SUBFIELD': {'OF': 1.0},
 'INTELLIGENCE': {'CONCERNED': 0.3333333333333333, 'IS': 0.6666666666666666},
 'COMPUTER': {'SCIENCE': 1.0},
 'IT': {'IS': 1.0},
 'WITH': {'THE': 1.0},
 'OF': {'LINGUISTICS': 0.5, 'NATURAL': 0.5},
 'LANGUAGE': {'PROCESSING': 0.4,
  'DATA': 0.2,
  'UNDERSTANDING': 0.2,
  'GENERATION': 0.2},
 'RESULTS': {'IT': 1.0},
 'ENGINEERING': {'AND': 1.0},
 'LANGUAGES': {'IN': 1.0},
 'AMOUNTS': {'OF': 1.0},
 'TECHNOLOGY': {'AND': 1.0},
 'UNDERSTANDING': {'AND': 1.0},
 'THE': {'INTERACTIONS': 1.0}}





Copied!







model

model

{'NOWADAYS': {'ARTIFICIAL': 1.0},
 'ITS': {'ETHICAL': 1.0},
 'DATA': {'CHALLENGES': 1.0},
 'INVOLVE': {'SPEECH': 1.0},
 'CONCERN': {},
 'ARTIFICIAL': {'INTELLIGENCE': 1.0},
 'COMPUTERS': {'AND': 0.5, 'TO': 0.5},
 'BETWEEN': {'COMPUTERS': 1.0},
 'ANALYZE': {'LARGE': 1.0},
 'LARGE': {'AMOUNTS': 1.0},
 'GENERATION': {'NOWADAYS': 1.0},
 'HAS': {'WONDERFUL': 1.0},
 'TRENDING': {'TECHNOLOGY': 1.0},
 'DEVELOPING': {'TECHNOLOGY': 1.0},
 'ALTHOUGH': {'ARTIFICIAL': 1.0},
 'SCIENCE': {'INFORMATION': 1.0},
 'TO': {'PROGRAM': 0.5, 'PROCESS': 0.5},
 'GAINING': {'POPULARITY': 1.0},
 'HUMAN': {'NATURAL': 1.0},
 'POPULARITY': {'AMONG': 1.0},
 'MARVELOUS': {'TECHNOLOGY': 1.0},
 'PROCESSING': {'NLP': 0.5, 'FREQUENTLY': 0.5},
 'STILL': {'A': 1.0},
 'NLP': {'IS': 0.5, 'DEVELOPERS': 0.5},
 'NATURAL': {'LANGUAGE': 0.8333333333333334, 'LANGUAGES': 0.16666666666666666},
 'LINGUISTICS': {'COMPUTER': 1.0},
 'INTERACTIONS': {'BETWEEN': 1.0},
 'CHALLENGES': {'IN': 1.0},
 'IS': {'A': 0.6666666666666666,
  'GAINING': 0.16666666666666666,
  'STILL': 0.16666666666666666},
 'INFORMATION': {'ENGINEERING': 1.0},
 'MAJOR': {'CONCERN': 1.0},
 'DEVELOPERS': {'ALTHOUGH': 1.0},
 'HOW': {'TO': 1.0},
 'FREQUENTLY': {'INVOLVE': 1.0},
 'A': {'SUBFIELD': 0.2,
  'HIGHLY': 0.2,
  'MARVELOUS': 0.2,
  'DEVELOPING': 0.2,
  'MAJOR': 0.2},
 'IN': {'PARTICULAR': 0.5, 'NATURAL': 0.5},
 'AMONG': {'NLP': 1.0},
 'USE': {'IS': 1.0},
 'AND': {'ARTIFICIAL': 0.14285714285714285,
  'HUMAN': 0.14285714285714285,
  'ANALYZE': 0.14285714285714285,
  'NATURAL': 0.14285714285714285,
  'IS': 0.14285714285714285,
  'HAS': 0.14285714285714285,
  'ITS': 0.14285714285714285},
 'WONDERFUL': {'RESULTS': 1.0},
 'PARTICULAR': {'HOW': 1.0},
 'PROCESS': {'AND': 1.0},
 'SPEECH': {'RECOGNITION': 1.0},
 'RECOGNITION': {'NATURAL': 1.0},
 'CONCERNED': {'WITH': 1.0},
 'PROGRAM': {'COMPUTERS': 1.0},
 'ETHICAL': {'USE': 1.0},
 'HIGHLY': {'TRENDING': 1.0},
 'SUBFIELD': {'OF': 1.0},
 'INTELLIGENCE': {'CONCERNED': 0.3333333333333333, 'IS': 0.6666666666666666},
 'COMPUTER': {'SCIENCE': 1.0},
 'IT': {'IS': 1.0},
 'WITH': {'THE': 1.0},
 'OF': {'LINGUISTICS': 0.5, 'NATURAL': 0.5},
 'LANGUAGE': {'PROCESSING': 0.4,
  'DATA': 0.2,
  'UNDERSTANDING': 0.2,
  'GENERATION': 0.2},
 'RESULTS': {'IT': 1.0},
 'ENGINEERING': {'AND': 1.0},
 'LANGUAGES': {'IN': 1.0},
 'AMOUNTS': {'OF': 1.0},
 'TECHNOLOGY': {'AND': 1.0},
 'UNDERSTANDING': {'AND': 1.0},
 'THE': {'INTERACTIONS': 1.0}}





Copied!







np.random.choice(
    ["batata", "alface", "cenoura"],
    p=[0.1, 0.5, 0.4],
)

np.random.choice(
    ["batata", "alface", "cenoura"],
    p=[0.1, 0.5, 0.4],
)

np.str_('alface')





Copied!







np.random.choice(
    ["batata", "alface", "cenoura"],
    p=[0.1, 0.5, 0.4],
)

np.random.choice(
    ["batata", "alface", "cenoura"],
    p=[0.1, 0.5, 0.4],
)

np.str_('alface')





Copied!







def suggestion(model, word):
    if word not in model:
        raise Exception("Word is not in model")
    
    next_words = list(model[word].keys())
    probs = np.array([model[word][w] for w in next_words])
    try:
        choice = np.random.choice(next_words, p=probs)
    except:
        return 'A'
    return choice
    

def suggestion(model, word):
    if word not in model:
        raise Exception("Word is not in model")
    
    next_words = list(model[word].keys())
    probs = np.array([model[word][w] for w in next_words])
    try:
        choice = np.random.choice(next_words, p=probs)
    except:
        return 'A'
    return choice





Copied!







def suggestion(model, word):
    if word not in model:
        raise Exception("Word is not in model")
    
    next_words = list(model[word].keys())
    probs = np.array([model[word][w] for w in next_words])
    try:
        choice = np.random.choice(next_words, p=probs)
    except:
        return 'A'
    return choice
    

def suggestion(model, word):
    if word not in model:
        raise Exception("Word is not in model")
    
    next_words = list(model[word].keys())
    probs = np.array([model[word][w] for w in next_words])
    try:
        choice = np.random.choice(next_words, p=probs)
    except:
        return 'A'
    return choice





Copied!





Copied!





Copied!







wordlist = []
w = "A"
print(w)

wordlist.append(w)
for _ in range(20):
    w_ = suggestion(model, w)
    w = w_
    wordlist.append(w)

print(' '.join(wordlist))
    

wordlist = []
w = "A"
print(w)

wordlist.append(w)
for _ in range(20):
    w_ = suggestion(model, w)
    w = w_
    wordlist.append(w)

print(' '.join(wordlist))

A
A HIGHLY TRENDING TECHNOLOGY AND IS STILL A MAJOR CONCERN A MAJOR CONCERN A SUBFIELD OF LINGUISTICS COMPUTER SCIENCE INFORMATION ENGINEERING





Copied!







wordlist = []
w = "A"
print(w)

wordlist.append(w)
for _ in range(20):
    w_ = suggestion(model, w)
    w = w_
    wordlist.append(w)

print(' '.join(wordlist))
    

wordlist = []
w = "A"
print(w)

wordlist.append(w)
for _ in range(20):
    w_ = suggestion(model, w)
    w = w_
    wordlist.append(w)

print(' '.join(wordlist))

A
A HIGHLY TRENDING TECHNOLOGY AND IS STILL A MAJOR CONCERN A MAJOR CONCERN A SUBFIELD OF LINGUISTICS COMPUTER SCIENCE INFORMATION ENGINEERING





Copied!







wordlist = []


for _ in range(20):
    w = np.random.choice(words)
    wordlist.append(w)

print(' '.join(wordlist))
    

wordlist = []


for _ in range(20):
    w = np.random.choice(words)
    wordlist.append(w)

print(' '.join(wordlist))

INTELLIGENCE LANGUAGE POPULARITY DATA IS NATURAL NATURAL DEVELOPERS NLP ARTIFICIAL ETHICAL NLP BETWEEN A NATURAL AND ITS IN MAJOR IS





Copied!







wordlist = []


for _ in range(20):
    w = np.random.choice(words)
    wordlist.append(w)

print(' '.join(wordlist))
    

wordlist = []


for _ in range(20):
    w = np.random.choice(words)
    wordlist.append(w)

print(' '.join(wordlist))

INTELLIGENCE LANGUAGE POPULARITY DATA IS NATURAL NATURAL DEVELOPERS NLP ARTIFICIAL ETHICAL NLP BETWEEN A NATURAL AND ITS IN MAJOR IS





Copied!





Copied!

Keys	Action
`?`	Open this help
`n`	Next page
`p`	Previous page
`s`	Search

Exercise 01¶

2¶

3¶