Module nepse_tools.scraper.price_scraper
Expand source code
import datetime
import os.path
from typing import Iterable
import pandas as pd
from decouple import config
from nepse_tools.scraper.price_scraper.scraper import PriceScraper
from nepse_tools.share_market.indicators.base_indicator import DataColumns
from nepse_tools.utils.logger import logger
def date_range(start: str | datetime.date, end: str | datetime.date = datetime.datetime.now().date()):
start_datetime = start
end_datetime = end
time_delta = datetime.timedelta(days=1)
if type(start) is str:
start_datetime = datetime.datetime.strptime(start, "%Y-%m-%d").date()
if type(end) is str:
end_datetime = datetime.datetime.strptime(end, "%Y-%m-%d").date()
while start_datetime < end_datetime:
start_datetime += time_delta
yield start_datetime
def save_data_to_csv(date_generator: Iterable | None = None, csv_path: str = config("SHARE_PRICE_STORAGE_LOCATION")):
price_scraper = PriceScraper()
scraped_data = {
key: [] for key in price_scraper.share_price_keys
}
scraped_data_df = pd.DataFrame()
if os.path.exists(csv_path):
scraped_data_df = pd.read_csv(
csv_path,
converters=DataColumns.COLUMN_DATA_TYPE_CONVERTER
)
if date_generator is None:
if scraped_data_df.empty:
last_date = pd.read_csv(
csv_path,
converters=DataColumns.COLUMN_DATA_TYPE_CONVERTER
).tail(1)["date"].values[0]
else:
last_date = scraped_data_df.tail(1)["date"].values[0]
date_generator = date_range(last_date, datetime.datetime.now().date())
for date in date_generator:
logger.info(f"Scraping `{date}`")
if price_data := price_scraper.parse_share_price(date=date):
for key in scraped_data:
scraped_data[key] = [*scraped_data[key], *price_data[key]]
logger.success(f"Scraped `{date}`")
else:
logger.error(f"No Data Available For `{date}`")
scraped_data_df = pd.concat(
[scraped_data_df, pd.DataFrame(scraped_data)]
).reset_index().drop_duplicates()
for col in DataColumns.COLUMN_DATA_TYPE_CONVERTER:
logger.info(f"Datatype verification on `{col}`")
scraped_data_df[col].apply(DataColumns.COLUMN_DATA_TYPE_CONVERTER[col])
logger.success(f"Data verified for `{col}`")
logger.info(f"Saving data to csv at: `{csv_path}`")
scraped_data_df.to_csv(csv_path, columns=price_scraper.share_price_keys)
logger.success(f"Data saved to csv at: `{csv_path}`")
Sub-modules
nepse_tools.scraper.price_scraper.scraper
Functions
def date_range(start: str | datetime.date, end: str | datetime.date = datetime.date(2022, 3, 11))
-
Expand source code
def date_range(start: str | datetime.date, end: str | datetime.date = datetime.datetime.now().date()): start_datetime = start end_datetime = end time_delta = datetime.timedelta(days=1) if type(start) is str: start_datetime = datetime.datetime.strptime(start, "%Y-%m-%d").date() if type(end) is str: end_datetime = datetime.datetime.strptime(end, "%Y-%m-%d").date() while start_datetime < end_datetime: start_datetime += time_delta yield start_datetime
def save_data_to_csv(date_generator: Optional[Iterable] = None, csv_path: str = 'data/scraped_data/share_price.csv')
-
Expand source code
def save_data_to_csv(date_generator: Iterable | None = None, csv_path: str = config("SHARE_PRICE_STORAGE_LOCATION")): price_scraper = PriceScraper() scraped_data = { key: [] for key in price_scraper.share_price_keys } scraped_data_df = pd.DataFrame() if os.path.exists(csv_path): scraped_data_df = pd.read_csv( csv_path, converters=DataColumns.COLUMN_DATA_TYPE_CONVERTER ) if date_generator is None: if scraped_data_df.empty: last_date = pd.read_csv( csv_path, converters=DataColumns.COLUMN_DATA_TYPE_CONVERTER ).tail(1)["date"].values[0] else: last_date = scraped_data_df.tail(1)["date"].values[0] date_generator = date_range(last_date, datetime.datetime.now().date()) for date in date_generator: logger.info(f"Scraping `{date}`") if price_data := price_scraper.parse_share_price(date=date): for key in scraped_data: scraped_data[key] = [*scraped_data[key], *price_data[key]] logger.success(f"Scraped `{date}`") else: logger.error(f"No Data Available For `{date}`") scraped_data_df = pd.concat( [scraped_data_df, pd.DataFrame(scraped_data)] ).reset_index().drop_duplicates() for col in DataColumns.COLUMN_DATA_TYPE_CONVERTER: logger.info(f"Datatype verification on `{col}`") scraped_data_df[col].apply(DataColumns.COLUMN_DATA_TYPE_CONVERTER[col]) logger.success(f"Data verified for `{col}`") logger.info(f"Saving data to csv at: `{csv_path}`") scraped_data_df.to_csv(csv_path, columns=price_scraper.share_price_keys) logger.success(f"Data saved to csv at: `{csv_path}`")