commit 1315ff3d99ec7a876a5853f98870d193656acaed Author: Marcel Weschke Date: Wed May 20 12:01:39 2026 +0200 Initial setup steps diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..afed073 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.csv diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c13e6af --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Marcel Weschke + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2d02533 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# Google Trends Market Sentiment Analysis Tool + +## Overview +Traditional market data captures what has happened, but rarely explains *why* or what happens next. This project introduces a systematic framework that leverages alternative data—specifically online search volumes via Google Trends—as a leading indicator for tactical asset allocation and risk control. + +By analyzing real-time shifts in collective investor attention, the tool quantifies market psychology before it fully materializes into trading decisions. + +--- + +## The Core Scaling Challenge & Solution + +> **The Problem:** Google Trends normalizes search volume to a relative $0 \text{ to } 100$ scale *per individual request*. This makes it statistically impossible to directly compare or chain together data from different batch requests. +> +> **The Algorithmic Solution:** This script implements an **"Anchor-Logic"** to establish a unified global scale. Every automated batch request includes a high-volume, neutral reference term (configurable via `--anchor`, default: `'weather'`). The pipeline then dynamically rescales parallel batches using the **median ratio** of the overlapping anchor series: +> +> $$\text{Scaling Factor} = \text{median}\left(\frac{\text{Anchor}_{\text{Target Batch}}}{\text{Anchor}_{\text{Reference Batch}}}\right)$$ +> +> This technique achieves true cross-batch comparability across independent API calls. + +--- + +## Methodology & Pipeline Architecture + +The prototype (`google_trends_sentiment_prototype.py`) is structured as a modular quantitative pipeline: + +### 1. Data Ingestion (Anchor-Based) +Automated retrieval of pre-defined Risk-On, Risk-Off, and Macroeconomic keywords via the `pytrends` API, structurally unified globally using the Anchor-Logic described above. + +### 2. Normalization Layer +Applies a **Z-score transformation** to the rescaled raw data. This establishes statistical parity across keywords with vastly different structural search volumes by centering the mean at $0$ and scaling variance to $1$: + +$$z = \frac{x - \mu}{\sigma}$$ + +Where: +* $x$ is the anchor-adjusted search volume intensity. +* $\mu$ is the historical mean of that specific keyword series. +* $\sigma$ is the historical standard deviation of the series. + +### 3. Index Construction & Signal Extraction +* **Sentiment Spread:** Measures the relative strength of optimism versus pessimism in the market: + $$\text{Sentiment Spread} = \left( \frac{1}{N} \sum_{i=1}^{N} z_{\text{Risk-On}, i} \right) - \left( \frac{1}{M} \sum_{j=1}^{M} z_{\text{Risk-Off}, j} \right)$$ +* **Macro PCA Factor:** Extracts the first principal component ($PC_1$) from the combined Z-score feature matrix using Singular Value Decomposition (SVD) via `scikit-learn`: + $$\mathbf{Z} = \mathbf{U}\mathbf{\Sigma}\mathbf{V}^T \implies PC_1 = \mathbf{Z}\mathbf{v}_1$$ + This isolates the dominant underlying psychological driver capturing the highest common variance. + +### 4. Market Validation (Optional) +Resamples the extracted signals to a weekly frequency and performs quantitative correlation analysis against live financial benchmarks using `yfinance` without compromising the statistical independence of the core signal. + +*Note: This prototype currently focuses on contemporaneous correlation as a proof-of-concept. Time horizons and keyword definitions are structurally predefined rather than data-driven optimized.* + +--- + +## Getting Started + +### Dependencies +Install the required quantitative stack: +```bash +pip install pytrends pandas numpy scikit-learn yfinance matplotlib diff --git a/combined_sentiment_analysis_GLOBAL.png b/combined_sentiment_analysis_GLOBAL.png new file mode 100644 index 0000000..9f43c02 Binary files /dev/null and b/combined_sentiment_analysis_GLOBAL.png differ diff --git a/google_trends_sentiment_prototype.py b/google_trends_sentiment_prototype.py new file mode 100644 index 0000000..161a2b3 --- /dev/null +++ b/google_trends_sentiment_prototype.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Author: Marcel Weschke +Date: 2026-02-19 +Script Name: google_trends_sentiment_prototype.py + +Description: +------------ +Task: Prototyping a Market Sentiment Indicator using Google Trends. +This tool extracts and analyzes search query intensities as leading indicators +for risk management and tactical asset allocation. + +Scaling Challenge & Algorithmic Solution: +----------------------------------------- +Google Trends normalizes search volume to a 0-100 scale per individual request. +To ensure cross-batch comparability, this script implements an "Anchor-Logic": +- A common reference term (default: 'weather') is included in every request. +- Batches are rescaled using the median ratio of the anchor series to + establish a unified global scale. + +Objective: +---------- +Construct a robust sentiment factor by synthesizing search intensities +of risk-related and macroeconomic keywords. + +Methodology: +------------ +1) Data Ingestion: Automated retrieval of Risk-On, Risk-Off, and Macro terms. +2) Normalization: Applying Z-score transformations for cross-keyword statistical parity. +3) Index Construction: + - Sentiment Spread: (Avg Risk-On Z-score) - (Avg Risk-Off Z-score). + - Macro PCA Factor: Extraction of the first principal component (common variance). +4) Validation: Quantitative correlation analysis against market benchmarks (e.g., MSCI World). + +Outputs: +-------- +- trends_raw_.csv +- trends_features_.csv +- sentiment_data_.csv +- sentiment_plot_.png / combined_sentiment_analysis_.png + +Dependencies: +------------- +pip install pytrends pandas numpy scikit-learn yfinance matplotlib + +Usage Examples: +--------------- +python google_trends_sentiment_prototype.py --geo GLOBAL --no-plot +python google_trends_sentiment_prototype.py --geo GLOBAL --ticker URTH +python google_trends_sentiment_prototype.py --geo DE --ticker ^GDAXI +""" + +import argparse +import time +from dataclasses import dataclass +from typing import List, Tuple + +import numpy as np +import pandas as pd +from pytrends.request import TrendReq +from sklearn.decomposition import PCA +import yfinance as yf +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec + +pd.set_option('future.no_silent_downcasting', True) # Prevends future Warning + +# ------ +# Config +# ------ +@dataclass +class TrendsConfig: + """Configuration settings for the Google Trends API request.""" + timeframe: str = "today 5-y" # e.g. "today 12-m", "2019-01-01 2024-12-31" + geo: str = "" # ""=GLOBAL, "DE"=Deutschland, "US"=USA, ... + hl: str = "en-US" + tz: int = 360 # Timezone (Minutes); 360 ~ CET/CEST (proxy) + cat: int = 0 # 0=all categories + gprop: str = "" # ""=web search, "news", "images", "youtube", "froogle" + retries: int = 5 + sleep_s: float = 1.0 + anchor: str = "weather" # Used to normalize across different keyword batches + batch_size: int = 4 # +1 Anchor => max 5 Keywords pro pytrends call + + +# Anchor-Keyword-Sets: For DE-Geo may add german synonyms - default: GLOBAL +RISK_ON = ["buy stocks", "equity rally", "risk on", "emerging markets", "carry trade"] +#RISK_OFF = ["recession", "market crash", "credit spread", "default", "safe haven"] +RISK_OFF = ["energy crisis", "market crash", "credit spread", "debt ceiling", "trade war"] +MACRO = ["inflation", "interest rates", "central bank", "unemployment", "bond yields"] + + +# ------------------------- +# Utility: pytrends wrapper +# ------------------------- +def _chunks(xs: List[str], n: int) -> List[List[str]]: + """Split a list into smaller chunks of size n.""" + return [xs[i:i+n] for i in range(0, len(xs), n)] + + +def build_pytrends(cfg: TrendsConfig) -> TrendReq: + """Initialize the Pytrends request object.""" + return TrendReq(hl=cfg.hl, tz=cfg.tz) + + +def fetch_interest_over_time(pytrends: TrendReq, keywords: List[str], cfg: TrendsConfig) -> pd.DataFrame: + """ + Fetch search volume data for a specific keyword list with retry logic. + Returns a DataFrame with the search interest over the specified timeframe. + """ + last_err = None + for attempt in range(1, cfg.retries + 1): + try: + pytrends.build_payload( + kw_list=keywords, + timeframe=cfg.timeframe, + geo=cfg.geo, + cat=cfg.cat, + gprop=cfg.gprop, + ) + df = pytrends.interest_over_time() + if df is None or df.empty: + raise RuntimeError("Empty response from Google Trends") + if "isPartial" in df.columns: + df = df.drop(columns=["isPartial"]) + return df + except Exception as e: + last_err = e + time.sleep(cfg.sleep_s * attempt) + raise RuntimeError(f"Failed to fetch trends after {cfg.retries} retries. Last error: {last_err}") + + +# -------------------------------- +# Core: Batch-Rescaling via Anchor +# -------------------------------- +def rescale_batches_via_anchor(batch_frames: List[pd.DataFrame], anchor: str) -> pd.DataFrame: + """ + Normalizes multiple API batches by using a common 'anchor' keyword. + This overcomes Google's 0-100 scaling limitation for different requests. + """ + if not batch_frames: + return pd.DataFrame() + + base = batch_frames[0].copy() + if anchor not in base.columns: + raise ValueError("Anchor not present in base batch") + + out = base.drop(columns=[anchor], errors="ignore") + base_anchor = base[anchor].replace(0, np.nan) + + for i in range(1, len(batch_frames)): + df = batch_frames[i].copy() + if anchor not in df.columns: + raise ValueError(f"Anchor not present in batch {i}") + a = df[anchor].replace(0, np.nan) + # Calculate the ratio between the current batch's anchor and the base batch + ratio = (base_anchor / a).replace([np.inf, -np.inf], np.nan) + scale = np.nanmedian(ratio.values) + if not np.isfinite(scale) or scale <= 0: + scale = 1.0 + + df_rescaled = df.drop(columns=[anchor], errors="ignore") * scale + out = out.join(df_rescaled, how="outer") + + return out.sort_index() + + +def fetch_trends_all_keywords(cfg: TrendsConfig, all_keywords: List[str]) -> pd.DataFrame: + """Orchestrates the fetching and rescaling of all keywords in batches.""" + pytrends = build_pytrends(cfg) + keywords = [kw for kw in all_keywords if kw.lower() != cfg.anchor.lower()] + batches = _chunks(keywords, cfg.batch_size) + + batch_frames = [] + for b in batches: + kw_list = b + [cfg.anchor] + df = fetch_interest_over_time(pytrends, kw_list, cfg) + batch_frames.append(df) + + rescaled = rescale_batches_via_anchor(batch_frames, cfg.anchor) + # weekly frequency alignment (Google Trends usually returns weekly for multi-year) + rescaled = rescaled.asfreq("W-SUN").ffill() + return rescaled + + +# ---------------------------- +# Features + Sentiment Indices +# ---------------------------- +def zscore(df: pd.DataFrame) -> pd.DataFrame: + """Calculate the Z-score (Standardization) for each column.""" + mu = df.mean(skipna=True) + sd = df.std(skipna=True).replace(0, np.nan) + return (df - mu) / sd + + +def ewma(df: pd.DataFrame, span: int = 8) -> pd.DataFrame: + """Apply Exponentially Weighted Moving Average to smooth the time series.""" + return df.ewm(span=span, adjust=False).mean() + + +def build_sentiment_indices(trends: pd.DataFrame, risk_on: list, risk_off: list): + """ + Calculates Z-Scores, applies EWMA smoothing, and performs PCA. + Includes a robustness check against keywords with no data (NaN/Zero-Variance). + """ + features = pd.DataFrame(index=trends.index) + + # Calculate Z-Score & EWMA for each keyword + for col in trends.columns: + features[f"raw_{col}"] = trends[col] + # Z-Score Normalization + std = trends[col].std() + if std > 0: + z = (trends[col] - trends[col].mean()) / std + features[f"z_{col}"] = z + features[f"z_ewma_{col}"] = z.ewm(span=10).mean() + else: + # Handle keywords with zero variance or all NaN + features[f"z_{col}"] = np.nan + features[f"z_ewma_{col}"] = np.nan + + # Robustness-Check: Identify usable EWMA columns for PCA + z_ewma_cols = [f"z_ewma_{c}" for c in trends.columns] + + # Keep only columns that do not contain all NaNs and have variance > 0 + valid_cols = [] + for c in z_ewma_cols: + if c in features.columns and not features[c].isnull().all(): + if features[c].std() > 0: + valid_cols.append(c) + + if not valid_cols: + raise ValueError("No valid keyword data found for PCA calculation!") + + print(f"PCA-Input: Using {len(valid_cols)} of {len(z_ewma_cols)} keywords (rest had insufficient volume).") + + # PCA Calculation using only valid data + # Fill remaining NaNs with 0 for PCA stability, though valid_cols should be clean + pca_data = features[valid_cols].fillna(0) + pca = PCA(n_components=1) + features["sentiment_pca"] = pca.fit_transform(pca_data) + + # Correct PCA sign (should correlate positively with the average of Z-Scores) + if np.corrcoef(features["sentiment_pca"], pca_data.mean(axis=1))[0, 1] < 0: + features["sentiment_pca"] *= -1 + + # Difference Index (Risk-On vs Risk-Off) - only use valid columns + on_cols = [f"z_ewma_{c}" for c in risk_on if f"z_ewma_{c}" in valid_cols] + off_cols = [f"z_ewma_{c}" for c in risk_off if f"z_ewma_{c}" in valid_cols] + + avg_on = features[on_cols].mean(axis=1) if on_cols else 0 + avg_off = features[off_cols].mean(axis=1) if off_cols else 0 + features["sentiment_diff"] = avg_on - avg_off + + # Final Sentiment DataFrame for plotting + sentiment = features[["sentiment_pca", "sentiment_diff"]].copy() + + return features, sentiment + +# ------------------------------- +# Extra: +# Validation Feature: Correlation +# ------------------------------- +def validate_against_ticker(sentiment: pd.DataFrame, ticker: str, timeframe: str) -> float: + """ + Fetches ticker data, aligns schedules, and calculates correlation with sentiment_pca. + """ + print(f"Validating sentiment against ticker: {ticker}...") + + # Download daily ticker data to ensure we have enough data points + data = yf.download(ticker, period="5y", interval="1d") + + if data.empty: + print("Warning: Could not fetch ticker data.") + return 0.0 + + # Resample ticker data to weekly, taking the Friday close + ticker_weekly = data['Close'].resample('W-FRI').last() + + # Ensure sentiment data is also mapped to Friday for alignment + # Google Trends usually gives Sunday, so we shift it to Friday to match yfinance + sentiment_aligned = sentiment.copy() + sentiment_aligned.index = sentiment_aligned.index + pd.Timedelta(days=5) + + # Align dataframes (inner join ensures we only compare dates present in both) + combined = pd.concat([sentiment_aligned['sentiment_pca'], ticker_weekly], axis=1).dropna() + + if combined.empty: + print("Warning: Date alignment failed. Cannot calculate correlation.") + return 0.0 + + # Calculate Pearson Correlation + correlation = combined.corr().iloc[0, 1] + print(f"Correlation between Sentiment PCA and {ticker}: {correlation:.2f}") + + return correlation + + +def print_statistical_summary(features: pd.DataFrame): + """ + Prints a clean descriptive statistics summary to the terminal. + Focuses on the Z-Scores relevant for PCA analysis. + """ + print(f"\n{'='*25} STATISTICAL ANALYSIS {'='*25}") + + # Filter columns representing the smoothed Z-Scores (PCA Input) + z_cols = [c for c in features.columns if c.startswith('z_ewma_')] + stats = features[z_cols].describe().transpose() + + # Filter keywords that provided valid data (std > 0 and no NaNs) + clean_stats = stats[stats['std'] > 0].dropna() + + print(f"\n[Key Metrics] Descriptive Statistics of Input Signals (Smoothed Z-Scores):") + if not clean_stats.empty: + # Display key metrics to validate normalization + print(clean_stats[['mean', 'std', 'min', 'max']].to_string(float_format=lambda x: f"{x:,.4f}")) + print(f"\nNote: Means close to 0 and Std Dev close to 1 validate successful normalization.") + else: + print("Note: No valid data found for statistical summary.") + + print(f"\n{'='*72}\n") + +# --------------------------------------------------------------------------- +# I/O + Plot (mapping Investor Psychology) +# - What people are searching for on Google -> a leading indicator +# - Example: If the Red Line drops sharply, it suggests that market anxiety +# is rising rapidly, which usually (can) precedes a drop in +# equity funds or ETFs. +# --------------------------------------------------------------------------- +def save_outputs(prefix: str, trends: pd.DataFrame, features: pd.DataFrame, sentiment: pd.DataFrame) -> None: + """Exports data to CSV files for further analysis in Excel or Bloomberg.""" + trends.to_csv(f"trends_raw_{prefix}.csv") + features.to_csv(f"trends_features_{prefix}.csv") + sentiment.to_csv(f"sentiment_index_{prefix}.csv") + +# Mapping for cleaner labels in plots and reports +TICKER_MAP = { + # --- Good Examples: Equity & Growth (High Risk Sensitivity) --- + "^GSPC": "S&P 500 (US Proxy)", + "URTH": "MSCI World (Global Proxy)", + "^GDAXI": "DAX 40 (EU/DE Proxy)", + "^STOXX50E": "Euro Stoxx 50 (EU Proxy)", + "^IXIC": "NASDAQ Composite (Growth/Tech Proxy)", + + # --- Good Examples: Risk Metrics (Volatility & Credit) --- + "^VIX": "CBOE Volatility Index (Fear Barometer - Expect Inverse Corr)", + "HYG": "iShares High Yield Corporate Bond ETF (Credit Risk)", + + # --- Less Good Examples (Specific/Inverse Drivers) --- + "BTC-USD": "Bitcoin (Speculative/Idiosyncratic)", + "GC=F": "Gold Futures (Safe Haven/Often Inverse)" +} + +def plot_sentiment(prefix: str, sentiment: pd.DataFrame, ticker: str = None, correlation: float = None) -> None: + """ + Generates the visualization. If a ticker is provided, a combined dual-axis plot is created; + otherwise, a single sentiment index plot is shown. + """ + # Reference the global TICKER_MAP. If ticker not found, use the raw ticker symbol. + display_name = TICKER_MAP.get(ticker, ticker) + + # Determine filename and layout based on ticker presence + if ticker: + filename = f"combined_sentiment_analysis_{prefix}.png" + fig = plt.figure(figsize=(14, 10)) + gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) + else: + filename = f"sentiment_plot_{prefix}.png" + fig = plt.figure(figsize=(12, 7)) + gs = gridspec.GridSpec(1, 1) + + # --- Top Plot: Sentiment Indices (Always present) --- + ax1 = fig.add_subplot(gs[0]) + ax1.plot(sentiment.index, sentiment["sentiment_diff"], + label="Risk-On/Off Spread", color='royalblue', alpha=0.4, linewidth=1.5) + ax1.plot(sentiment.index, sentiment["sentiment_pca"], + label="Macro PCA Factor", color='crimson', linestyle='--', linewidth=2.5) + ax1.axhline(0, color='black', linewidth=1) + + title = f"Market Sentiment Index ({prefix})" + if ticker and correlation is not None: + title += f"\nValidation Correlation: {display_name} vs. PCA Factor = {correlation:.2f}" + + ax1.set_title(title, fontweight='bold', fontsize=14) + ax1.set_ylabel("Z-Score") + ax1.legend(loc='upper left') + ax1.grid(True, linestyle=':', alpha=0.6) + + # --- Bottom Plot: Ticker Comparison (Only if ticker is provided) --- + if ticker: + data = yf.download(ticker, start=sentiment.index.min(), end=sentiment.index.max()) + if not data.empty: + # Handle potential MultiIndex from yfinance + price_series = data['Close'][ticker] if isinstance(data.columns, pd.MultiIndex) else data['Close'] + + ax2 = fig.add_subplot(gs[1], sharex=ax1) + ax2.plot(price_series.index, price_series, color='darkgreen', linewidth=2, label=display_name) + + # Use .values.flatten() to avoid Pandas Series attribute errors + ax2.fill_between(price_series.index, price_series.values.flatten(), color='darkgreen', alpha=0.1) + + ax2.set_ylabel("Price / Index Level") + ax2.legend(loc='upper left') + ax2.grid(True, linestyle=':', alpha=0.6) + + plt.tight_layout() + plt.savefig(filename, dpi=300) + print(f"-> Plot saved as: {filename}") + plt.show() + +# ---- +# Main +# ---- +def parse_args(): + """Parses command-line arguments for tool configuration.""" + p = argparse.ArgumentParser( + description="Market Sentiment Analysis Tool using Google Trends and Ticker Correlation." + ) + p.add_argument( + "--geo", + type=str, + default="GLOBAL", + help="Geographic region code (ISO 3166-1 alpha-2). Use 'US', 'DE', etc. Default: 'GLOBAL'." + ) + p.add_argument( + "--ticker", + type=str, + default=None, + help="Yahoo Finance ticker symbol for validation (e.g., '^GSPC', 'URTH'). Default: None." + ) + p.add_argument( + "--timeframe", + type=str, + default="today 5-y", + help="Data duration. Use 'today 12-m', 'today 5-y', or 'YYYY-MM-DD YYYY-MM-DD'. Default: 'today 5-y'." + ) + p.add_argument( + "--gprop", + type=str, + default="", + help="Google property to filter (e.g., 'news', 'images', 'froogle', 'youtube'). Default: '' (Web Search)." + ) + p.add_argument( + "--anchor", + type=str, + default="weather", + help="Reference term used to rescale and link multiple keyword batches. Default: 'weather'." + ) + p.add_argument( + "--no-plot", + action="store_true", + help="Disable visual plot generation and only save CSV data. Default: False." + ) + return p.parse_args() + + +def main(): + """Main execution flow for the sentiment analysis tool.""" + # Parse arguments using your defined function + args = parse_args() + + # Determine region and prefix + geo = "" if args.geo.upper() == "GLOBAL" else args.geo.upper() + prefix = "GLOBAL" if geo == "" else geo + + # Create config based on PARSED arguments + cfg = TrendsConfig( + geo=geo, + timeframe=args.timeframe, # Use parsed timeframe + gprop=args.gprop, # Use parsed gprop + anchor=args.anchor # Use parsed anchor + ) + + all_keywords = sorted(set(RISK_ON + RISK_OFF + MACRO)) + + print(f"Starting sentiment extraction for {prefix}...") + trends = fetch_trends_all_keywords(cfg, all_keywords) + features, sentiment = build_sentiment_indices(trends, RISK_ON, RISK_OFF) + + # Aave CSV files (trends_raw_GLOBAL.csv / sentiment_data_GLOBAL.csv) + save_outputs(prefix, trends, features, sentiment) + + # Perform Validation if ticker is provided + corr = None + if args.ticker: + print(f"Validating against Ticker: {args.ticker}...") + corr = validate_against_ticker(sentiment, args.ticker, cfg.timeframe) + + save_outputs(prefix, trends, features, sentiment) + + # Handle plot result by --ticker and/or --no-plot flag call + if not args.no_plot: + # With Ticker flag -> combined_sentiment_analysis_GLOBAL.png + # Without Ticker flag -> sentiment_plot_GLOBAL.png + plot_sentiment(prefix, sentiment, args.ticker, corr) + + # Extra: Descriptive Statistics: + print_statistical_summary(features) + + print(f"--- Process complete. Files saved with prefix: {prefix} ---") + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d25f30c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +# Main requirements +pandas +pytrends +yfinance +scikit-learn