# Import necessary libraries
from bs4 import BeautifulSoup
from keras.layers import Dense, LSTM
from keras.models import Sequential
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import requests
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

2023-05-13 02:33:12.096548: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-13 02:33:12.181286: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-05-13 02:33:12.183025: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-13 02:33:14.622238: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT


# Initialize an empty data frame to store the stock price data
price_data = pd.DataFrame()

# Initialize the path to the folder containing the data
folder_path = 'sp500-data'

# Iterate across each file in the folder by name
for file_name in os.listdir(folder_path):
    
    # Check if the current file is a CSV file
    if file_name.endswith('.csv'):
        
        # Read the current file into a temporary data frame
        temp = pd.read_csv(os.path.join(folder_path, file_name))
        
        # Extract the symbol from the current file's name
        symbol = file_name[0:-4]
        
        # Store the symbol in a new column in the temporary data frame 
        temp['Symbol'] = symbol
        
        # Concatenate the accumulating and temporary data frames
        price_data = pd.concat([price_data, temp], ignore_index = True)

# Print the last five rows of the price data frame
price_data.tail()


# Headers for the HTTP request
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    'From': 'pleaseletmein@gmail.com'
}

# Make an HTTP request to the Wikipedia URL and store the response
response = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies', headers = headers)

# Parse the text from the webpage as HTML
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table element containing the data and both extract and store the data
table = soup.find('table')

# Read the HTML table into a data frame
sector_data = pd.read_html(str(table), flavor = 'html5lib')[0]

# Print the last five rows of the sector data frame
sector_data.tail()


# Make an HTTP request to the Slickcharts URL and store the response
response = requests.get('https://www.slickcharts.com/sp500', headers = headers)

# Parse the text from the webpage as HTML
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table element containing the data and both extract and store the data
table = soup.find('table')

# Read the HTML table into a data frame
weight_data = pd.read_html(str(table), flavor = 'html5lib')[0]

# Print the last five rows of the sector data frame
weight_data.tail()


# Rename the sector and industry-related columns
sector_data = sector_data.rename(columns = {'GICS Sector': 'Sector', 'GICS Sub-Industry': 'Industry'})

# Drop unnecessary columns
sector_data = sector_data.drop(['Headquarters Location', 'Date added', 'CIK', 'Founded'], axis = 1)

# Print the last five rows of the data frame
sector_data.tail()


# Drop all columns except Symbol and Weight
weight_data = weight_data.drop(['#', 'Company', 'Price', 'Chg', '% Chg'], axis = 1)

# Print the last five rows of the data frame
weight_data.tail()


# Perform an inner join (merge) on all three data frames to create a single data frame
data = pd.merge(pd.merge(price_data, sector_data, on = 'Symbol'), weight_data, on = 'Symbol')

# Reindex the columns of the data frame
data = data.reindex(columns = ['Symbol', 'Security', 'Sector', 'Industry', 'Weight', 'Date', 'Open', 'High', 'Low', 'Close', 'Adjusted Close', 'Volume'])

# Cast the Date column's type to datetime
data['Date'] = pd.to_datetime(data['Date'], dayfirst = True)

# Print the last five rows of the resulting data frame
data.tail()


# Initialize an empty data frame to contain the filtered data
top_data = pd.DataFrame()

# Iterate across a list of the unique sectors
for sector in data['Sector'].unique():
    
    # Filter the data by the current sector
    sector_data = data[data['Sector'] == sector]

    # Compile a list of the top five weights in the current sector
    top_five_weights = sorted(sector_data['Weight'].unique(), reverse = True)[:5]
    
    # Filter the data by the top five weights
    sector_data = sector_data[sector_data['Weight'].isin(top_five_weights)]
    
    # Concatenate the top five companies' data into the accumulating dataframe
    top_data = pd.concat([top_data, sector_data], ignore_index = True)

# Print the last five rows of the filtered data frame
top_data.tail()


# Generate a plot for the top five companies in each sector
for sector in top_data['Sector'].unique():
    
    # Filter the data for the current sector
    sector_data = top_data[top_data['Sector'] == sector]
    
    # Reshape the data for plotting purposes
    sec_as_row = sector_data.pivot(index = 'Date', columns = 'Symbol', values = 'Adjusted Close')
    
    # Generate plot
    sec_as_row.plot(title = f'{sector}: Adjusted Close Price vs. Date', legend = True, xlabel = 'Date', ylabel = 'Adjusted Close Price', figsize = (10, 5))


# Generate a plot for the top five companies in each sector
for sector in top_data['Sector'].unique():
    
    # Filter the data for the current sector
    sector_data = top_data[top_data['Sector'] == sector]
    
    # Reshape the data for plotting purposes
    sec_as_row = sector_data.pivot(index = 'Date', columns = 'Symbol', values = 'Volume')
    
    # Generate plot
    sec_as_row.plot(title = f'{sector}: Volume vs. Date', legend = True, xlabel = 'Date', ylabel = 'Volume', figsize = (10, 5))


# Lengths of moving averages (in days) to calculate
moving_averages = [10, 20]

# Iterate across the moving averages
for ma in moving_averages:

    # Iterate across each company
    for security in top_data['Security'].unique():
        
        # Filter the data for the current company
        security_data = top_data[top_data['Security'] == security]
        
        # Add a column containing the current company's moving average
        top_data[f'{ma}-Day Moving Average'] = top_data['Adjusted Close'].rolling(ma).mean()
    
# Print the last five rows of the data frame
top_data.tail()


# Initialize an empty data frame to contain the daily return values
return_data = pd.DataFrame()

# Iterate across the sectors
for security in top_data['Security'].unique():
    
    # Filter the data for the current security
    security_data = top_data[top_data['Security'] == security]
    
    # Calculate the percent change i.e. daily return
    security_rets = pd.DataFrame(security_data['Adjusted Close'].pct_change())

    # Append this data to the accumulating data frame
    return_data = pd.concat([return_data, security_rets], ignore_index = True)

# Add the daily return values to the top company data frame
top_data['Daily Return'] = return_data

# Print the last five rows of the top data frame
top_data.tail()


# Initialize a data frame to contain the formatted data for plotting
formatted_data = top_data[['Symbol', 'Date', 'Daily Return']]

# Pivots the ticker symbols from a column's entries to column headers
formatted_data = formatted_data.pivot(index = 'Date', columns = 'Symbol', values = 'Daily Return')

# Print the last five rows of the formatted data frame
formatted_data.tail()


# Pair plot the comparisons of daily returns for all companies in the Information Technology sector
sns.pairplot(formatted_data[['AAPL', 'ACN', 'CRM', 'CSCO', 'MSFT']], kind = 'reg')

<seaborn.axisgrid.PairGrid at 0x7f54f8a6e4a0>


# Filter the data to only contain the symbol and adjusted close
goog_data = top_data.filter(['Date', 'Adjusted Close', 'Symbol'])

# Filter the data to only contain the data for Google
goog_data = goog_data[goog_data['Symbol'] == 'GOOG']

# Drop the symbol column
goog_data = goog_data.drop(columns = ['Symbol'])

# Set date to the data frame's index
goog_data.set_index('Date', inplace = True)

# Print the last five rows of the data frame
goog_data.head()


# Select the values to be trained
pre_train = goog_data.values

# Calculate the length of the training data
training_data_len = int(np.ceil(len(pre_train) * .95))

# Print this length
training_data_len

4382


# Create a scaler
scaler = MinMaxScaler(feature_range = (0, 1))

# Create scaled data
scaled_data = scaler.fit_transform(pre_train)

# Print the scaled data frame
scaled_data

array([[5.54601395e-05],
       [1.39474256e-03],
       [1.57790515e-03],
       ...,
       [6.17057513e-01],
       [6.11120335e-01],
       [6.07105972e-01]])


# Total training data
train_data = scaled_data[0:int(training_data_len), :]

x_train = []
y_train = []

# Split training data for x and y axis
for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 61:
        print(x_train)
        print(y_train)
        print()
        
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)

[array([5.54601395e-05, 1.39474256e-03, 1.57790515e-03, 8.16681841e-04,
       1.00656502e-03, 1.32752381e-03, 1.03177125e-03, 3.36085743e-04,
       3.96579086e-04, 4.03331846e-05, 2.52062297e-04, 0.00000000e+00,
       2.63827349e-04, 3.84818860e-04, 3.86498202e-04, 8.93979873e-04,
       1.25862571e-03, 1.92911142e-03, 2.01480938e-03, 2.34584744e-03,
       2.93735417e-03, 3.25158754e-03, 2.99616656e-03, 3.08690416e-03,
       3.49692925e-03, 3.33056652e-03, 3.06674561e-03, 4.51189259e-03,
       5.22102410e-03, 4.97232210e-03, 5.47308479e-03, 5.88982081e-03,
       6.44603560e-03, 6.22926685e-03, 6.52669393e-03, 6.33849170e-03,
       5.92343341e-03, 6.28303960e-03, 6.87117960e-03, 7.05602153e-03,
       7.41058809e-03, 8.25919514e-03, 8.05418662e-03, 6.80228150e-03,
       8.29616160e-03, 1.21695001e-02, 1.46850625e-02, 1.37440385e-02,
       1.44447685e-02, 1.56765022e-02, 1.52295171e-02, 1.61352524e-02,
       1.59403296e-02, 1.54025956e-02, 1.42313536e-02, 1.16519311e-02,
       1.21896587e-02, 1.15427095e-02, 1.14015514e-02, 1.39490471e-02])]
[0.013777644707187047]

[array([5.54601395e-05, 1.39474256e-03, 1.57790515e-03, 8.16681841e-04,
       1.00656502e-03, 1.32752381e-03, 1.03177125e-03, 3.36085743e-04,
       3.96579086e-04, 4.03331846e-05, 2.52062297e-04, 0.00000000e+00,
       2.63827349e-04, 3.84818860e-04, 3.86498202e-04, 8.93979873e-04,
       1.25862571e-03, 1.92911142e-03, 2.01480938e-03, 2.34584744e-03,
       2.93735417e-03, 3.25158754e-03, 2.99616656e-03, 3.08690416e-03,
       3.49692925e-03, 3.33056652e-03, 3.06674561e-03, 4.51189259e-03,
       5.22102410e-03, 4.97232210e-03, 5.47308479e-03, 5.88982081e-03,
       6.44603560e-03, 6.22926685e-03, 6.52669393e-03, 6.33849170e-03,
       5.92343341e-03, 6.28303960e-03, 6.87117960e-03, 7.05602153e-03,
       7.41058809e-03, 8.25919514e-03, 8.05418662e-03, 6.80228150e-03,
       8.29616160e-03, 1.21695001e-02, 1.46850625e-02, 1.37440385e-02,
       1.44447685e-02, 1.56765022e-02, 1.52295171e-02, 1.61352524e-02,
       1.59403296e-02, 1.54025956e-02, 1.42313536e-02, 1.16519311e-02,
       1.21896587e-02, 1.15427095e-02, 1.14015514e-02, 1.39490471e-02]), array([1.39474256e-03, 1.57790515e-03, 8.16681841e-04, 1.00656502e-03,
       1.32752381e-03, 1.03177125e-03, 3.36085743e-04, 3.96579086e-04,
       4.03331846e-05, 2.52062297e-04, 0.00000000e+00, 2.63827349e-04,
       3.84818860e-04, 3.86498202e-04, 8.93979873e-04, 1.25862571e-03,
       1.92911142e-03, 2.01480938e-03, 2.34584744e-03, 2.93735417e-03,
       3.25158754e-03, 2.99616656e-03, 3.08690416e-03, 3.49692925e-03,
       3.33056652e-03, 3.06674561e-03, 4.51189259e-03, 5.22102410e-03,
       4.97232210e-03, 5.47308479e-03, 5.88982081e-03, 6.44603560e-03,
       6.22926685e-03, 6.52669393e-03, 6.33849170e-03, 5.92343341e-03,
       6.28303960e-03, 6.87117960e-03, 7.05602153e-03, 7.41058809e-03,
       8.25919514e-03, 8.05418662e-03, 6.80228150e-03, 8.29616160e-03,
       1.21695001e-02, 1.46850625e-02, 1.37440385e-02, 1.44447685e-02,
       1.56765022e-02, 1.52295171e-02, 1.61352524e-02, 1.59403296e-02,
       1.54025956e-02, 1.42313536e-02, 1.16519311e-02, 1.21896587e-02,
       1.15427095e-02, 1.14015514e-02, 1.39490471e-02, 1.37776447e-02])]
[0.013777644707187047, 0.014259921756710325]

2023-05-13 02:35:28.503510: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:35:28.509576: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:35:28.511614: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-05-13 02:35:28.892036: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:35:28.896459: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:35:28.901289: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-05-13 02:35:29.634912: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:35:29.638368: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:35:29.641234: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-05-13 02:35:29.958713: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:35:29.961697: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:35:29.965383: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-05-13 02:35:31.199359: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:35:31.203932: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:35:31.207815: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-05-13 02:35:31.678822: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:35:31.682767: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:35:31.689594: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]

4322/4322 [==============================] - 183s 41ms/step - loss: 8.2505e-04

<keras.callbacks.History at 0x7f54f854d870>


# Create a new array containing scaled values from index 1543 to 2002 
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = pre_train[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])
    
# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

# Get the models predicted price values 
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))

rmse

2023-05-13 02:38:57.334339: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:38:57.339835: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:38:57.344173: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]
2023-05-13 02:38:57.751703: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-05-13 02:38:57.755639: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-05-13 02:38:57.760419: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_1_grad/concat/split_1/split_dim' with dtype int32
	 [[{{node gradients/split_1_grad/concat/split_1/split_dim}}]]

8/8 [==============================] - 2s 43ms/step

4.739062835437262


# Calculate range of stock price in order to contextualize RMSE

min_price = min(goog_data['Adjusted Close'])
max_price = max(goog_data['Adjusted Close'])

print(f'The GOOG stock price ranges from {min_price} to {max_price}')

The GOOG stock price ranges from 2.490912914276123 to 150.70899963378906


# Plot the data
train = top_data[top_data['Symbol'] == 'GOOG'][:training_data_len]
valid = top_data[top_data['Symbol'] == 'GOOG'][training_data_len:]
valid['Predictions'] = predictions  

# Visualize the data
plt.figure(figsize = (16,6))
plt.title('GOOG Share Price Predictions')
plt.xlabel('Date', fontsize = 18)
plt.ylabel('Close Price USD ($)', fontsize = 18)
plt.plot(train['Date'], train['Close'])
plt.plot(valid['Date'], valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc = 'lower right')
plt.show()

	Date	Low	Open	Volume	High	Close	Adjusted Close	Symbol
3265995	06-12-2022	152.089996	154.220001	1964800.0	155.500000	153.050003	153.050003	ZTS
3265996	07-12-2022	149.380005	152.960007	2444100.0	153.789993	150.250000	150.250000	ZTS
3265997	08-12-2022	149.199997	150.529999	2267500.0	154.350006	153.679993	153.679993	ZTS
3265998	09-12-2022	152.740005	153.940002	3274900.0	156.330002	153.389999	153.389999	ZTS
3265999	12-12-2022	152.970001	154.070007	301135.0	154.470001	153.625000	153.625000	ZTS

	Symbol	Security	GICS Sector	GICS Sub-Industry	Headquarters Location	Date added	CIK	Founded
498	YUM	Yum! Brands	Consumer Discretionary	Restaurants	Louisville, Kentucky	1997-10-06	1041061	1997
499	ZBRA	Zebra Technologies	Information Technology	Electronic Equipment & Instruments	Lincolnshire, Illinois	2019-12-23	877212	1969
500	ZBH	Zimmer Biomet	Health Care	Health Care Equipment	Warsaw, Indiana	2001-08-07	1136869	1927
501	ZION	Zions Bancorporation	Financials	Regional Banks	Salt Lake City, Utah	2001-06-22	109380	1873
502	ZTS	Zoetis	Health Care	Pharmaceuticals	Parsippany, New Jersey	2013-06-21	1555280	1952

	#	Company	Symbol	Weight	Price	Chg	% Chg
498	499	Newell Brands Inc	NWL	0.010325	9.27	-0.23	(-2.46%)
499	500	Zions Bancorporation N.A.	ZION	0.009851	22.47	-0.21	(-0.93%)
500	501	Lincoln National Corp	LNC	0.008901	19.26	-0.68	(-3.42%)
501	502	News Corporation Class B	NWS	0.005934	18.99	2.16	(12.80%)
502	503	DISH Network Corporation Class A	DISH	0.004431	6.15	-0.08	(-1.28%)

	Symbol	Security	Sector	Industry
498	YUM	Yum! Brands	Consumer Discretionary	Restaurants
499	ZBRA	Zebra Technologies	Information Technology	Electronic Equipment & Instruments
500	ZBH	Zimmer Biomet	Health Care	Health Care Equipment
501	ZION	Zions Bancorporation	Financials	Regional Banks
502	ZTS	Zoetis	Health Care	Pharmaceuticals

	Symbol	Weight
498	NWL	0.010325
499	ZION	0.009851
500	LNC	0.008901
501	NWS	0.005934
502	DISH	0.004431

A Deep Dive into the S&P 500: Predicting Stock Prices¶

Introduction¶

Data Collection¶

Reading in a Kaggle Dataset¶

Webscraping From Wikipedia¶

Webscraping From Slickcharts¶

Data Processing¶

Cleaning the Sector Data¶

Cleaning the Weight Data¶

Merging the Three Data Frames¶

Filtering the Top 5 Companies Within Each Sector¶

Exploratory Data Analysis and Data Visualization¶

Plotting Adjusted Close Price vs. Date¶

Plotting Volume vs. Date¶

Calculating Various Moving Averages¶

Calculating Daily Returns¶

Plotting and Comparing the Daily Returns of Various Stocks¶

Data Analysis, Hypothesis Testing, and Machine Learning¶

Filtering the Data to Google¶

Long Short-Term Memory Modeling¶

Organizing the Training Data¶

Fitting the Data and Getting the Training Data¶

Building and Training the Model¶

Plotting the Predicted GOOG Stock Prices¶

Insights¶

	Symbol	Security	Sector	Industry	Weight	Date	Open	High	Low	Close	Adjusted Close	Volume
2890656	ZTS	Zoetis	Health Care	Pharmaceuticals	0.249449	2022-12-06	154.220001	155.500000	152.089996	153.050003	153.050003	1964800.0
2890657	ZTS	Zoetis	Health Care	Pharmaceuticals	0.249449	2022-12-07	152.960007	153.789993	149.380005	150.250000	150.250000	2444100.0
2890658	ZTS	Zoetis	Health Care	Pharmaceuticals	0.249449	2022-12-08	150.529999	154.350006	149.199997	153.679993	153.679993	2267500.0
2890659	ZTS	Zoetis	Health Care	Pharmaceuticals	0.249449	2022-12-09	153.940002	156.330002	152.740005	153.389999	153.389999	3274900.0
2890660	ZTS	Zoetis	Health Care	Pharmaceuticals	0.249449	2022-12-12	154.070007	154.470001	152.970001	153.625000	153.625000	301135.0

	Symbol	Security	Sector	Industry	Weight	Date	Open	High	Low	Close	Adjusted Close	Volume
518500	VZ	Verizon	Communication Services	Integrated Telecommunication Services	0.457305	2022-12-06	36.990002	37.070000	36.630001	36.889999	36.889999	26293700.0
518501	VZ	Verizon	Communication Services	Integrated Telecommunication Services	0.457305	2022-12-07	36.740002	37.310001	36.669998	37.169998	37.169998	23065900.0
518502	VZ	Verizon	Communication Services	Integrated Telecommunication Services	0.457305	2022-12-08	37.110001	37.240002	36.869999	37.099998	37.099998	19549100.0
518503	VZ	Verizon	Communication Services	Integrated Telecommunication Services	0.457305	2022-12-09	37.209999	37.630001	36.959999	37.400002	37.400002	20669100.0
518504	VZ	Verizon	Communication Services	Integrated Telecommunication Services	0.457305	2022-12-12	37.689999	37.730000	37.279999	37.615002	37.615002	4698435.0

Symbol	AAPL	ABBV	ACN	AEP	AMT	AMZN	APD	BA	BAC	BLK	...	SLB	SO	SRE	T	TMO	UNP	UPS	V	VZ	XOM
Date
2022-12-06	-0.025370	-0.001342	-0.025039	0.019573	-0.014331	-0.030326	-0.009112	-0.036035	-0.042646	0.003404	...	-0.006347	0.014819	0.010141	0.022400	-0.011988	0.000379	-0.033451	-0.021527	-0.004856	-0.027796
2022-12-07	-0.013785	0.010261	0.004485	0.003113	-0.006635	0.002380	0.011150	-0.010817	-0.007879	-0.001591	...	-0.021226	0.000292	-0.017720	0.006781	0.013823	0.005115	-0.002456	-0.006074	0.007590	-0.002214
2022-12-08	0.012133	0.003567	0.019045	0.010862	0.005400	0.021366	0.013562	0.014618	-0.009163	-0.008066	...	0.002410	0.002628	-0.001970	-0.009326	0.017576	0.003817	0.028396	0.006208	-0.001883	0.007429
2022-12-09	-0.003435	-0.017652	-0.012802	-0.011666	0.007491	-0.013946	-0.017039	0.002569	-0.001849	0.004990	...	-0.059094	-0.004659	-0.005861	-0.001569	-0.013593	-0.007886	-0.011078	-0.001913	0.008086	-0.008427
2022-12-12	0.000563	0.002112	0.010090	0.004608	0.001356	-0.006566	0.003037	0.038265	0.005405	-0.003105	...	0.023206	0.025819	0.009246	0.005500	0.010951	0.004353	0.019815	0.022377	0.005749	0.017288

	Adjusted Close
Date
2004-08-19	2.499133
2004-08-20	2.697639
2004-08-23	2.724787
2004-08-24	2.611960
2004-08-25	2.640104