Source code for planetmapper.kernel_downloader

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Utility to help downloading spice kernels.

Will download local copy of kernels with same directory structure as on 
https://naif.jpl.nasa.gov/. Use :func:`planetmapper.set_kernel_path` to choose the
location that the kernels are downloaded to.

These functions can be used to download a set of URLS. For example: ::

    from planetmapper.kernel_downloader import download_urls

    # Download all kernel files in generic_kernels/pck
    download_urls('https://naif.jpl.nasa.gov/pub/naif/generic_kernels/pck/')

    # Download specific kernel file
    download_urls('https://naif.jpl.nasa.gov/pub/naif/generic_kernels/lsk/naif0012.tls')

    # Download multiple sets of kernel files
    download_urls(
        'https://naif.jpl.nasa.gov/pub/naif/generic_kernels/spk/planets/',
        'https://naif.jpl.nasa.gov/pub/naif/generic_kernels/spk/satellites/',
    )

"""
import os
import urllib.parse
import urllib.request

import tqdm

from . import utils
from .base import get_kernel_path

URL_ROOT = 'https://naif.jpl.nasa.gov/pub/'


[docs]def download_urls(*urls: str, **kwargs) -> None: """ Download data from naif.jpl.nasa.gov and save locally. urls can either be a the url of a single kernel, or the index page containing multiple kernels. If a single kernel, download the kernel using download_kernel(). If an index page, download all first-level files using :func:`download_kernels_from_webpage`. Args: urls: kernel URL on naif.jpl.nasa.gov. **kwargs: passed to :func:`download_kernel` and :func:`download_kernels_from_webpage`. """ for url in urls: # look for '.' in filename part of url to identify if a file/directory path = urllib.parse.urlsplit(url).path if '.' in os.path.split(path)[1]: download_kernel(url, **kwargs) else: download_kernels_from_webpage(url, **kwargs)
[docs]def download_kernels_from_webpage(index_url: str, **kwargs) -> None: """ Download all first-level kernels listed in the page given by index_url. URL must be on https://naif.jpl.nasa.gov/pub/. This will break if JPL changes the format of the webpage. .. warning :: This function will only download kernels found immediately on `index_url`. Kernels in nested folders must therefore be downloaded manually. Args: index_url: URL of index page on naif.jpl.nasa.gov. **kwargs: passed to :func:`download_kernel`. """ urls = get_kernel_paths_from_webpage(index_url) print(f'{len(urls)} to download from {index_url}') for idx, url in enumerate(urls): download_kernel(url, note=f'[{idx+1}/{len(urls)}] ', **kwargs) print(f'All kernels downloaded from {index_url}') print()
[docs]def download_kernel(url: str, force_download: bool = False, note: str = '') -> None: """ Download single kernel given by url. URL must be on https://naif.jpl.nasa.gov/pub/. By default will only download file if if does not already exist locally. Set `force_download=True` to override this check and download the file even if it already exists locally. Args: url: URL of kernel on naif.jpl.nasa.gov. force_download: toggle overwriting already downloaded kernels. note: string to include in progress message. """ kp = _get_kernel_path(url) print(f'{note}Checking {kp}') if _check_kernel_exists_locally(url): if force_download: print(' Kernel already exists, downloading anyway') else: print(' OK - Kernel already exists locally') return local_path = _convert_url_to_local_path(url) print(f' Downloading to {local_path}') download_file(url, local_path) print(' Done')
[docs]def get_kernel_paths_from_webpage(index_url: str) -> list[str]: """ Get list of kernel urls from an index page on https://naif.jpl.nasa.gov/pub/. This is a bit of a hack and will break if JPL changes the format of the webpage. Args: index_url: URL of webpage. Returns: List of URL strings corresponding to kernels on the webpage. """ # pylint: disable=consider-using-with assert index_url.startswith(URL_ROOT), f'URL must begin with {URL_ROOT}' webpage = urllib.request.urlopen(index_url).read().decode() data = webpage.split('<!--start data_content-->')[1].split('</table>')[0] lines = data.splitlines() # get lines from table paths = [] for l in lines: if not l.startswith('<img src="/icons/'): continue # ignore irrelevant lines from table href = l.split('<a href="')[1].split('"')[0] # find links from table if '.' in href: p = index_url + '/' + href # create url from link paths.append(p) return paths
def _check_kernel_exists_locally(url: str) -> bool: """Test if kernel file already exists on local filesystem.""" local_path = _convert_url_to_local_path(url) return os.path.exists(local_path) def _convert_url_to_local_path(url: str) -> str: """Convert a url on https://naif.jpl.nasa.gov to the equavilent local path.""" assert url.startswith(URL_ROOT), f'URL must begin with {URL_ROOT}' kp = _get_kernel_path(url) return _kernel_path_to_local_path(kp) def _standardise_path(p: str) -> str: """Make a standardised version of path.""" return os.path.normpath(os.path.expanduser(p)) def _get_kernel_path(p: str) -> str: """ Get the useful part of the path from a URL/local filepath. For example both 'https://naif.jpl.nasa.gov/pub/naif/generic_kernels/spk/satellites/' and '~/spice/naif/generic_kernels/spk/satellites/' are converted into 'naif/generic_kernels/spk/satellites' """ p = _standardise_path(p) for prefix in (URL_ROOT, get_kernel_path()): prefix = _standardise_path(prefix) if p.startswith(prefix): return _standardise_path(os.path.relpath(p, prefix)) raise ValueError('Cannot get kernel path from "{}"'.format(p)) def _kernel_path_to_url(kp: str) -> str: """Create URL from a kernel path""" return URL_ROOT + kp def _kernel_path_to_local_path(kp: str) -> str: """Create a local path from a kernel path""" return _standardise_path(get_kernel_path() + os.path.sep + kp)
[docs]def download_file(url: str, local_path: str) -> None: """ Download kernel file to local system. Args: url: URL of kernel file. local_path: File path to save kernel file on local system. """ utils.check_path(local_path) # download to temp file so don't get issues from partial downloads being killed temp_path = local_path + '.temp' urllib.request.urlretrieve(url, temp_path, reporthook=_DownloadProgressBar()) # once fully downloaded, we can safely move the temp file to the desired path os.replace(temp_path, local_path)
class _DownloadProgressBar: """ Shows download progress with tqdm """ def __init__(self): self.pbar = None self.previous_downloaded = 0 def __call__(self, block_num, block_size, total_size): if not self.pbar: self.pbar = tqdm.tqdm( total=total_size, unit_scale=True, unit='B', unit_divisor=1024 ) downloaded = block_num * block_size change = downloaded - self.previous_downloaded self.previous_downloaded = downloaded self.pbar.update(change)