import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from itables import show
import os
import glob
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
tqdm.pandas()
files = sorted(glob.glob("classified/nz_tweets*.csv"))
print(files)
pd.set_option('display.max_colwidth', -1)
%%time
df = pd.concat(pd.read_csv(f, sep=';', lineterminator='\n') for f in files)
print(len(df))
%%time
df.timestamp = pd.to_datetime(df.timestamp)
df.index = df.timestamp
daycounts = df.timestamp.resample("D").count()
daycounts[daycounts == 0]
df.columns
df.timestamp.resample("M").count().plot(figsize=(15,15), title="NZ twitter", ylim=(0,3e5))
df.timestamp[df["compound"] > 0.05].resample("M").count().plot(color='g')
df.timestamp[df["compound"] < -0.05].resample("M").count().plot(color='r')
df.timestamp[df["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of tweets per month")
plt.show()
df["compound"].resample("M").mean().plot(figsize=(20,20), title="Mean sentiment per month")
df.timestamp[df.text.str.contains(r"\bhot\b", na=False)].resample("M").count().plot(figsize=(15, 15), title="NZ hot vs cold tweets", color="r", ylim=(0,1000))
df.timestamp[df.text.str.contains(r"\bcold\b", na=False)].resample("M").count().plot(color="b")
plt.ylabel("# of tweets per month")
plt.show()
pattern = r"global.?warming|climate.?chang|sea.?level.?ris|rising.?sea.?level|climate.?crisis|climate.?action|extreme.?weather|biodiversity|IPCC|Paris.?accord"
environmental_tweets = df[df.text.str.contains(pattern, na=False, case=False)].copy()
print(len(environmental_tweets))
environmental_tweets["text_with_emoji_formatted"] = environmental_tweets.text_with_emoji.str.replace(pattern, lambda x: "<b>" + x.group() + "</b>", case=False)
show(environmental_tweets[["text_with_emoji_formatted", "compound"]],
order=[[2, 'desc']],
orderClasses=False,
createdRow="""function( row, data, dataIndex ) {
if (data[2] > 0) {
$(row).css('background-color', 'rgba(0,255,0,.2)');
} else {
$(row).css('background-color', 'rgba(255,0,0,.2)');
}
}""")
environmental_tweets["compound"].resample("M").mean().plot(figsize=(20,20), title="Mean sentiment towards climate change per month")
environmental_tweets.timestamp.resample("M").count().plot(figsize=(15,15), title="NZ twitter", ylim=(0, 120))
environmental_tweets.timestamp[environmental_tweets["compound"] > 0.05].resample("M").count().plot(color='g')
environmental_tweets.timestamp[environmental_tweets["compound"] < -0.05].resample("M").count().plot(color='r')
environmental_tweets.timestamp[environmental_tweets["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of tweets per month")
plt.show()