import json
import logging
import os
from datetime import datetime, timedelta
from pathlib import Path
from typing import Union
from .. import network_aiohttp, network_requests
from . import network
EXPIRY_TIME_FORMAT = "%Y/%m/%d, %H:%M:%S"
EXPIRE_HOURS = 12
logger = logging.getLogger("pmm")
# {url:{new-etag:"xxxx", file-size:12345, meta-etag:"uuuuu"}}
etag_and_file_size_cache = {}
from enum import Enum
[docs]
class HttpClient(Enum):
REQUESTS = 1
AIOHTTP = 2
[docs]
class FileDownloader:
"""class for managing single file download"""
def __init__(
self,
file_url: str,
meta_filepath: str,
dst_dir: str,
filename: Union[str, None] = None,
auto_unzip: bool = True,
expire_hours=EXPIRE_HOURS,
expiry_time_format=EXPIRY_TIME_FORMAT,
large_file_hint=False,
timeout=(None, None),
http_client: HttpClient = HttpClient.REQUESTS,
) -> None:
"""FileDownloader constructor
:param file_url: the url to the file
:param metadata_file: the path to the metadata file
:param dst_dir: the destination to save the file
"""
self.file_url = file_url
self.meta_filepath = meta_filepath
self.dst_dir = dst_dir
self.filename = filename
self.expire_hours = expire_hours
self.expiry_time_format = expiry_time_format
self.meta_etag = None
self.new_etag = None
self.meta_sha256 = None
self.new_sha256 = None
self.file_size = None
self.large_file_hint = large_file_hint
self.timeout = timeout
self.auto_unzip = auto_unzip
self.http_client = http_client
[docs]
def check_if_file_need_update(self):
"""Decide whether the target file should be downloaded again.
Returns:
bool: ``True`` when the file should be downloaded/re-downloaded,
``False`` when the existing local file can be reused.
Decision flow:
1. If the metadata file is missing, return ``True``.
2. If the stored ``url`` differs from ``self.file_url`` (or is
missing), return ``True``.
3. If the metadata ``expiry`` is still valid, return ``False``.
4. If expired (or expiry is invalid/missing), compare remote
content state:
- Prefer SHA-256 comparison when available.
- Fall back to ETag comparison if SHA-256 cannot be obtained.
- If neither reliable value is available, return ``True``.
"""
#
# first check if the metadata file exists
# since metadata file is inside the layer folder, this check will also confirm the existence of the layer folder
#
if not os.path.isfile(self.meta_filepath):
logger.debug(
f"the metadata file({self.meta_filepath}) does not exist, need to download the file({self.file_url})"
)
return True
with open(self.meta_filepath, "r") as f:
meta = json.load(f)
#
# check if the "url" in the metafile matches the "layer file url"
#
if "url" in meta:
meta_url = meta["url"]
if meta_url != self.file_url:
logger.debug(
"the layer url has changed, re-download the file({file_url})"
)
return True
else:
logger.debug(
"no url found in the metafile. to be on the safe side, re-download the file({file_url})"
)
return True
#
# now check the layer file's expiry date
#
need_check_etag = False
if "expiry" in meta:
try:
meta_expiry = meta["expiry"]
expiry_date = datetime.strptime(
meta_expiry, self.expiry_time_format
)
now = datetime.now()
if now > expiry_date:
logger.debug("The file expired. Check sha256 or etag.")
need_check_etag = (
True # expired, need to check sha256 or etag to decide
)
else:
# layer file has not expired yet, no need to check update
logger.debug(
f"The file has not expired yet (expiry date: {expiry_date}, now: {now}). No need to check sha256 or etag. Will use the local file."
)
return False
except ValueError:
need_check_etag = True # invalid expiry date, need to check sha256 or etag to decide
else:
need_check_etag = True # no expiry date in metafile, need to check sha256 or etag to make sure
if need_check_etag:
self.meta_sha256 = meta.get("sha256")
self.new_sha256 = network.get_sha256(
self.file_url, timeout=self.timeout
)
if self.new_sha256:
if self.meta_sha256 == self.new_sha256:
logger.debug(
f"SHA-256 unchanged: {self.meta_sha256} matches {self.new_sha256}"
)
return False
logger.debug(
f"SHA-256 has changed or is missing in metadata. re-download the file({self.file_url})"
)
return True
if "etag" in meta:
meta_etag = meta["etag"]
headers = network.get_headers(self.file_url)
self.file_size = network.get_content_length(headers)
self.new_etag = network.get_etag(headers)
if meta_etag == self.new_etag:
logger.debug(f"{meta_etag} -- {self.new_etag}")
return False
else:
logger.debug(
f"etag has been changed. re-download the file({self.file_url})"
)
return True
else:
logger.debug(
f"no etag found in the metadata file, to be safe, re-download the file({self.file_url})"
)
return True
logger.debug("This line and below should not be reached!!!!")
return True
[docs]
def check_if_expire_date_need_update(self):
# if we have checked the etag and it is the same as before
# we need to update the expiry date
return (
self.new_sha256 is not None and self.meta_sha256 == self.new_sha256
) or (self.new_etag is not None and self.new_etag == self.meta_etag)