aboutsummaryrefslogtreecommitdiffstats
path: root/sql/graph_builder.py
blob: df108b34c34ae90b028c72b9264189c5df33b281 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import plotly.graph_objs as go
import pandas as pd
import mysql.connector
from mysql.connector import errorcode
import warnings
import math
from datetime import datetime, timedelta
import numpy as np


def plot_subscriber_count_over_time(host, username, password, database_name, table_name,
                                    gtitle = "Subscriber Count Over Time for Nijisanji Members",
                                    overrideQuery = None, markers = "lines", exclude_channels = []):
    warnings.filterwarnings('ignore')  # Ignore pandas warning regarding pyodbc
    try:
        cnx = mysql.connector.connect(user = username, password = password,
                                      host = host, database = database_name)
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("Something is wrong with your user name or password")
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print("Database does not exist")
        else:
            print(err)
    query = f"SELECT name, subscriber_count, timestamp, channel_id FROM {table_name} ORDER by timestamp DESC" if overrideQuery is None else overrideQuery
    df = pd.read_sql_query(query, cnx)
    groups = df.groupby("name")
    fig = go.Figure()
    config = dict({'responsive': True, 'displaylogo': False})
    for channel, group in groups:
        if len(exclude_channels) != 0 and group['channel_id'].iloc[0] in exclude_channels:
            continue
        fig.add_trace(go.Scattergl(
            x = group["timestamp"], y = group["subscriber_count"], name = channel, mode = markers,
            showlegend = True))

    fig.update_layout(
        title = {'text': gtitle, 'x': 0.5, 'xanchor': 'center',
                 'yanchor': 'top', 'font': {'family': 'Arial', 'size': 30}},
        xaxis_title = "Timestamp",
        yaxis_title = "Subscribers",
        legend = dict(font = dict(size = 16), title = dict(text = "Channels")),
        height = 950
    )
    return fig.to_html(config = config)


def generate_projection(host, username, password, database_name, table_name, curr_subscribers,
                        timezone = "Pacific Standard Time"):
    def get_next_milestone(subscriber_count):
        num_digits = len(str(subscriber_count))
        if num_digits <= 4:
            milestone_interval = 10000
        else:
            milestone_interval = 10 ** (num_digits - 1)
        next_milestone = math.ceil(
            subscriber_count / milestone_interval) * milestone_interval

        return next_milestone

    warnings.filterwarnings('ignore')  # Ignore pandas warning regarding pyodbc
    try:
        cnx = mysql.connector.connect(user = username, password = password,
                                      host = host, database = database_name)
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("Something is wrong with your user name or password")
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print("Database does not exist")
        else:
            print(err)

    def create_milestone_card(time_until_milestone, next_milestone, not_enough_data = False,
                              declining = False):
        now = datetime.now()
        milestone_date = (
                now + timedelta(seconds = time_until_milestone)).strftime('%Y-%m-%d')
        relative_time = now + timedelta(seconds = time_until_milestone) - now
        next_milestone_str = "{:,}".format(next_milestone)
        if relative_time.days > 1:
            relative_time_str = f"In {relative_time.days} days"
        elif relative_time.days == 1:
            relative_time_str = f"In {relative_time.days} day"
        elif relative_time.days < 0:
            relative_time_str = f"{-relative_time.days} days ago"
        elif not_enough_data:
            relative_time_str = "Not enough data"
        elif declining:
            relative_time_str = "Declining"
        else:
            relative_time_str = "Today"
        card = f"""
        <div style="background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.2); font-family: 'Poppins', sans-serif;">
            <div style="font-size: 24px; font-weight: bold; margin-bottom: 10px;">Next Milestone</div>
            <div style="font-size: 18px; margin-bottom: 10px;">{next_milestone_str}</div>
            <div style="display: flex; flex-direction: row; justify-content: space-between; margin-bottom: 10px;">
                <div style="font-size: 16px;">Estimated Date:</div>
                <div style="font-size: 16px; font-weight: bold;">{milestone_date}</div>
            </div>
            <div style="display: flex; flex-direction: row; justify-content: space-between; color: #888;">
                <div style="font-size: 16px;">{relative_time_str}</div>
                <div style="font-size: 16px;">{timezone}</div>
            </div>
        </div>
        <br>
        """

        return card

    query = f"SELECT name, subscriber_count, timestamp FROM {table_name} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) ORDER BY timestamp DESC"
    df = pd.read_sql_query(query, cnx)
    df = df.sort_values(by = 'timestamp')
    # get the rate of change from first data point to last data point
    first_data = df.iloc[0]
    last_data = df.iloc[-1]
    delta_sub_count = last_data['subscriber_count'] - first_data['subscriber_count']
    delta_time = (last_data['timestamp'] - first_data['timestamp']).total_seconds()

    # Calculate the average rate of change of subscriber_count over time
    avg_rate_of_change = delta_sub_count / delta_time
    next_milestone = get_next_milestone(curr_subscribers)
    if avg_rate_of_change == 0 or math.isnan(avg_rate_of_change) or math.isinf(avg_rate_of_change):
        return create_milestone_card(0, next_milestone, not_enough_data = True)
    if avg_rate_of_change < 0:
        return create_milestone_card(0, next_milestone, declining = True)
    time_to_next_milestone = (
                                     next_milestone - curr_subscribers) / avg_rate_of_change
    return create_milestone_card(time_to_next_milestone, next_milestone)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage