search_params = {"dataset": "cmip6-fs", "project": "cmip6"} # Define our search parameters
url = "http://localhost:7777" # URL of our test server.
from getpass import getpass
import requests
from tempfile import NamedTemporaryFile
import xarray as xr

list(requests.get(
    f"{url}/api/databrowser/data_search/freva/file", 
    params=search_params, 
    stream=True
).iter_lines(decode_unicode=True))

['/home/wilfred/workspace/freva-nextgen/freva-rest/src/databrowser_api/mock/data/model/global/cmip6/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/amip/r2i1p1f1/Amon/ua/gn/v20190815/ua_mon_MPI-ESM1-2-LR_amip_r2i1p1f1_gn_197901-199812.nc',
 '/home/wilfred/workspace/freva-nextgen/freva-rest/src/databrowser_api/mock/data/model/global/cmip6/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/amip/r1i1p1f1/Amon/ua/gn/v20201108/ua_Amon_ACCESS-CM2_amip_r1i1p1f1_gn_197901-201412.nc']

auth = requests.post(
    f"{url}/api/auth/v2/token",
    data={"username": "janedoe", "password":getpass("Password: ")}
).json()

res = requests.get(
    f"{url}/api/databrowser/load/freva", 
    params=search_params, 
    headers={
        "Authorization": f"Bearer {auth['access_token']}"
    },
    stream=True
)

zarr_files = list(res.iter_lines(decode_unicode=True))
zarr_files

['http://localhost:7777/api/freva-data-portal/zarr/dcb608a0-9d77-5045-b656-f21dfb5e9acf.zarr',
 'http://localhost:7777/api/freva-data-portal/zarr/f56264e3-d713-5c27-bc4e-c97f15b5fe86.zarr']

dset = xr.open_dataset(
    zarr_files[0],
    engine="zarr",
    chunks="auto", 
    storage_options={"headers": {"Authorization": f"Bearer {auth['access_token']}"}}
)
dset

<xarray.Dataset>
Dimensions:    (lat: 27, bnds: 2, lon: 43, plev: 19, time: 11)
Coordinates:
  * lat        (lat) float64 0.9326 2.798 4.663 6.528 ... 43.83 45.7 47.56 49.43
  * lon        (lon) float64 101.2 103.1 105.0 106.9 ... 174.4 176.2 178.1 180.0
  * plev       (plev) float64 1e+05 9.25e+04 8.5e+04 7e+04 ... 1e+03 500.0 100.0
  * time       (time) datetime64[ns] 1979-01-16T12:00:00 ... 1979-11-16
Dimensions without coordinates: bnds
Data variables:
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(27, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(43, 2), meta=np.ndarray>
    time_bnds  (time, bnds) datetime64[ns] dask.array<chunksize=(11, 2), meta=np.ndarray>
    ua         (time, plev, lat, lon) float32 dask.array<chunksize=(11, 19, 27, 43), meta=np.ndarray>
Attributes: (12/47)
    CDI:                   Climate Data Interface version 2.0.6 (https://mpim...
    source:                MPI-ESM1.2-LR (2017): \naerosol: none, prescribed ...
    institution:           Max Planck Institute for Meteorology, Hamburg 2014...
    Conventions:           CF-1.7 CMIP-6.2
    activity_id:           CMIP
    branch_method:         no parent
    ...                    ...
    variable_id:           ua
    variant_label:         r2i1p1f1
    license:               CMIP6 model data produced by MPI-M is licensed und...
    cmor_version:          3.5.0
    tracking_id:           hdl:21.14100/0898c2ad-5382-4d0c-8adb-2ca96387fb54
    CDO:                   Climate Data Operators version 2.0.6 (https://mpim...

array([ 0.93263 ,  2.79789 ,  4.66315 ,  6.528409,  8.393669, 10.258928,
       12.124187, 13.989446, 15.854704, 17.719962, 19.585219, 21.450475,
       23.315731, 25.180986, 27.046239, 28.911492, 30.776744, 32.641994,
       34.507243, 36.372491, 38.237736, 40.102979, 41.96822 , 43.833459,
       45.698694, 47.563926, 49.429154])

array([101.25 , 103.125, 105.   , 106.875, 108.75 , 110.625, 112.5  , 114.375,
       116.25 , 118.125, 120.   , 121.875, 123.75 , 125.625, 127.5  , 129.375,
       131.25 , 133.125, 135.   , 136.875, 138.75 , 140.625, 142.5  , 144.375,
       146.25 , 148.125, 150.   , 151.875, 153.75 , 155.625, 157.5  , 159.375,
       161.25 , 163.125, 165.   , 166.875, 168.75 , 170.625, 172.5  , 174.375,
       176.25 , 178.125, 180.   ])

array([100000.,  92500.,  85000.,  70000.,  60000.,  50000.,  40000.,  30000.,
        25000.,  20000.,  15000.,  10000.,   7000.,   5000.,   3000.,   2000.,
         1000.,    500.,    100.])

array(['1979-01-16T12:00:00.000000000', '1979-02-15T00:00:00.000000000',
       '1979-03-16T12:00:00.000000000', '1979-04-16T00:00:00.000000000',
       '1979-05-16T12:00:00.000000000', '1979-06-16T00:00:00.000000000',
       '1979-07-16T12:00:00.000000000', '1979-08-16T12:00:00.000000000',
       '1979-09-16T00:00:00.000000000', '1979-10-16T12:00:00.000000000',
       '1979-11-16T00:00:00.000000000'], dtype='datetime64[ns]')

PandasIndex(Index([ 0.932629967837991,  2.797889876956741,  4.663149706177884,
       6.5284094014799905,  8.393668907692383, 10.258928168006376,
       12.124187123455766, 13.989445712356673, 15.854703869694873,
       17.719961526447428,  19.58521860882233, 21.450475037398185,
        23.31573072614093, 25.180985581270594,  27.04623949994481,
        28.91149236871774,  30.77674406172325,  32.64199443851768,
        34.50724334150103,  36.37249059281224,  38.23773599056483,
         40.1029793042494,  41.96822026907538,  43.83345857895126,
       45.698693877701785,  47.56392574797867,  49.42915369712305],
      dtype='float64', name='lat'))

PandasIndex(Index([ 101.25, 103.125,   105.0, 106.875,  108.75, 110.625,   112.5, 114.375,
        116.25, 118.125,   120.0, 121.875,  123.75, 125.625,   127.5, 129.375,
        131.25, 133.125,   135.0, 136.875,  138.75, 140.625,   142.5, 144.375,
        146.25, 148.125,   150.0, 151.875,  153.75, 155.625,   157.5, 159.375,
        161.25, 163.125,   165.0, 166.875,  168.75, 170.625,   172.5, 174.375,
        176.25, 178.125,   180.0],
      dtype='float64', name='lon'))

PandasIndex(Index([100000.0,  92500.0,  85000.0,  70000.0,  60000.0,  50000.0,  40000.0,
        30000.0,  25000.0,  20000.0,  15000.0,  10000.0,   7000.0,   5000.0,
         3000.0,   2000.0,   1000.0,    500.0,    100.0],
      dtype='float64', name='plev'))

PandasIndex(DatetimeIndex(['1979-01-16 12:00:00', '1979-02-15 00:00:00',
               '1979-03-16 12:00:00', '1979-04-16 00:00:00',
               '1979-05-16 12:00:00', '1979-06-16 00:00:00',
               '1979-07-16 12:00:00', '1979-08-16 12:00:00',
               '1979-09-16 00:00:00', '1979-10-16 12:00:00',
               '1979-11-16 00:00:00'],
              dtype='datetime64[ns]', name='time', freq=None))

dset["ua"].mean(dim=("lon", "lat")).plot(x="time",  yincrease=False)

<matplotlib.collections.QuadMesh at 0x7f7b7ecefad0>

import intake
search_params["catalogue-type"] = "intake"
res = requests.get(
    f"{url}/api/databrowser/load/freva", 
    params=search_params, 
    headers={
        "Authorization": f"Bearer {auth['access_token']}"
    },
    stream=True
)

with NamedTemporaryFile(suffix=".json") as temp_f:
    with open(temp_f.name, "w") as stream:
        stream.write(res.text)
    cat = intake.open_esm_datastore(temp_f.name)
cat.df

from freva_client import authenticate, databrowser
data_query = databrowser(dataset="cmip6-fs", host="localhost:7777", stream_zarr=True)
token = authenticate(username="janedoe", host="localhost:7777")
files = list(data_query)
files

['http://localhost:7777/api/freva-data-portal/zarr/dcb608a0-9d77-5045-b656-f21dfb5e9acf.zarr',
 'http://localhost:7777/api/freva-data-portal/zarr/f56264e3-d713-5c27-bc4e-c97f15b5fe86.zarr']

cat = data_query.intake_catalogue()
cat.df

Loading Data via Zarr Endpoints¶

Workflow¶

"Order" the zarr datasets.¶

Open the zarr datasets¶

Creating intake catalogues¶

Using the freva client libray¶

How does it work?¶

What's next?¶

	uri	project	product	institute	model	experiment	time_frequency	realm	variable	ensemble	cmor_table	fs_type	grid_label
0	http://localhost:7777/api/freva-data-portal/za...	CMIP6	CMIP	MPI-M	MPI-ESM1-2-LR	amip	mon	atmos	ua	r2i1p1f1	Amon	posix	gn
1	http://localhost:7777/api/freva-data-portal/za...	CMIP6	CMIP	CSIRO-ARCCSS	ACCESS-CM2	amip	mon	atmos	ua	r1i1p1f1	Amon	posix	gn