Coverage for toardb/timeseries/schemas.py: 91%
367 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 20:32 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-03 20:32 +0000
1# SPDX-FileCopyrightText: 2021 Forschungszentrum Jülich GmbH
2# SPDX-License-Identifier: MIT
4"""
5Pydantic schemas for TOAR database
7"""
9from typing import List, Any, Union
11from pydantic import BaseModel, Json, validator, Field
12import datetime as dt
13from toardb.contacts.schemas import Contact
14from toardb.variables.schemas import Variable
15from toardb.stationmeta.schemas import Stationmeta, Coordinates, get_full_description_from_abbreviation
16from toardb.utils.utils import get_displaystr_from_value
17import toardb
19# ======== Timeseries =========
21class TimeseriesCoreBase(BaseModel):
22 id: int = None
23 label: str = Field(None, description="a short string to distinguish this timeseries from others with the same combination of station and variable")
24 order: int = Field(None, description="indicates position of this timeseries in a list when several timeseries share the same station and variable combination")
25 sampling_frequency: str = Field(None, description="Sampling frequency of data in this timeseries (see controlled vocabulary: Sampling Frequency)")
26 aggregation: str = Field(None, description="Aggregation type in this timeseries (see controlled vocabulary: Aggregation Type)")
27 data_start_date: dt.datetime = Field(None, description="Start date of the variable data available for this station")
28 data_end_date: dt.datetime = Field(None, description="End date of the variable data available for this station")
29 data_origin: str = Field(None, description="origin of data (model name or instrument) (see controlled vocabulary: Data Origin)")
30 data_origin_type: str = Field(None, description="type of data origin (see controlled vocabulary: Data Origin Type)")
31 provider_version: str = Field(None, description="provider data version")
32 sampling_height: float = Field(None, description="Height above the ground of the inlet/instrument/sampler (in m)")
33 additional_metadata: Json = Field(None, description="Additional information about the timeseries as JSON structure.")
34 #only for internal use
35 data_license_accepted: dt.datetime = Field(None, description="date when provider accepted the data license agreement")
36 dataset_approved_by_provider: dt.datetime = Field(None, description="date when provider approved the data")
37 doi : str = Field(None, description="DOI of the data")
38 coverage: float = Field(None, description="data coverage between the time series data_start_date and data_end_date")
40# still missing: "Score values from automated data QA (5-star evaluation)"
41# still missing: "detailed report of timeseries QA/QC evaluation"
42# still missing: "arbitrary additional information about this timeseries"
44 @validator('sampling_frequency')
45 def check_sampling_frequency(cls, v):
46 if v:
47 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.SF_vocabulary))[0].display_str
49 @validator('aggregation')
50 def check_aggregation(cls, v):
51 if v:
52 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary))[0].display_str
54 @validator('data_origin_type')
55 def check_data_origin_type(cls, v):
56 if v:
57 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary))[0].display_str
59 @validator('data_origin')
60 def check_data_origin(cls, v):
61 if v:
62 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary))[0].display_str
64 @validator('additional_metadata')
65 def check_additional_metadata(cls, v):
66 if v:
67 for key, value in v.items():
68 if key == 'sampling_type':
69 v[key] = get_displaystr_from_value(toardb.toardb.KS_vocabulary,int(value))
70 if key == 'absorption_cross_section':
71 v[key] = get_displaystr_from_value(toardb.toardb.CS_vocabulary,int(value))
72 if key == 'calibration_type':
73 v[key] = get_displaystr_from_value(toardb.toardb.CT_vocabulary,int(value))
74 return v
77class TimeseriesCoreCreate(TimeseriesCoreBase):
78 station_id: int
79 variable_id: int
80 programme_id: int = None
82 @validator('sampling_frequency')
83 def check_sampling_frequency(cls, v):
84 if tuple(filter(lambda x: x.string == v, toardb.toardb.SF_vocabulary)):
85 return v
86 else:
87 raise ValueError(f"sampling frequency not known: {v}")
89 @validator('aggregation')
90 def check_aggregation(cls, v):
91 if tuple(filter(lambda x: x.string == v, toardb.toardb.AT_vocabulary)):
92 return v
93 else:
94 raise ValueError(f"aggregation type not known: {v}")
96 @validator('data_origin_type')
97 def check_data_origin_type(cls, v):
98 if tuple(filter(lambda x: x.string == v, toardb.toardb.OT_vocabulary)):
99 return v
100 else:
101 raise ValueError(f"data origin type not known: {v}")
103 @validator('data_origin')
104 def check_data_origin(cls, v):
105 if tuple(filter(lambda x: x.string == v, toardb.toardb.DO_vocabulary)):
106 return v
107 else:
108 raise ValueError(f"data origin not known: {v}")
110 # overwrite validator in creation process
111 @validator('additional_metadata')
112 def check_additional_metadata(cls, v):
113 return v
116class TimeseriesCore(TimeseriesCoreBase):
117 id: int
119 class Config:
120 orm_mode = True
122# ======== TimeseriesRole =========
124class TimeseriesRoleBase(BaseModel):
125 id: int = None
126 role: str = Field(None, description="Role of contact (see controlled vocabulary: Role Code)")
127 status: str = Field(None, description="Status of contact (see controlled vocabulary: Role Status)")
128 contact: Contact = Field(None, description="Contact for this role")
130 @validator('role')
131 def check_role(cls, v):
132 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.RC_vocabulary))[0].display_str
134 @validator('status')
135 def check_status(cls, v):
136 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.RS_vocabulary))[0].display_str
138 @validator('contact')
139 def check_contact_is_private(cls, v):
140 if v.person:
141 if (v.person.id != 0):
142 if (v.person.isprivate):
143 v.person = None
144 return v
146 class Config:
147 orm_mode = True
150class TimeseriesRoleBaseFields(TimeseriesRoleBase):
151 contact_id: int = None
153 class Config:
154 orm_mode = True
157class TimeseriesRoleCreate(TimeseriesRoleBase):
158 contact: Contact = None
159 contact_id: int
161 @validator('role')
162 def check_role(cls, v):
163 if tuple(filter(lambda x: x.string == v, toardb.toardb.RC_vocabulary)):
164 return v
165 else:
166 raise ValueError(f"role code not known: {v}")
168 @validator('status')
169 def check_status(cls, v):
170 if tuple(filter(lambda x: x.string == v, toardb.toardb.RS_vocabulary)):
171 return v
172 else:
173 raise ValueError(f"role status not known: {v}")
175 class Config:
176 orm_mode = True
178# do not create a role while creating a timeseries!
179class TimeseriesRoleNoCreate(TimeseriesRoleCreate):
180 contact_id: int
181# contact: Contact = None
183 class Config:
184 orm_mode = True
187class TimeseriesRole(TimeseriesRoleBase):
188 id: int = Field(..., description="for internal use only")
190 class Config:
191 orm_mode = True
193class TimeseriesRoleFields(TimeseriesRoleCreate):
194 pass
196 @validator('role')
197 def check_role(cls, v):
198 return v
200 @validator('status')
201 def check_status(cls, v):
202 return v
205# ======== TimeseriesAnnotation =========
207class TimeseriesAnnotationBase(BaseModel):
208 id: int = None
209 kind: str = Field(..., description="kind of annotation (see controlled vocabulary: Kind Of Annotation)")
210 text: str = Field(..., description="text of annotation")
211 date_added: dt.datetime = Field(..., description="timestamp when annotation was added")
212 approved: bool = Field(..., description="Flag indicating whether the annotation of a time-series has been verified")
213 contributor_id: int = Field(..., description="ID of contributor who added the annotation")
215 @validator('kind')
216 def check_kind(cls, v):
217 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AK_vocabulary))[0].display_str
220class TimeseriesAnnotationPatch(BaseModel):
221 kind: int = None
222 text: str = None
223 date_added: dt.datetime = None
224 approved: bool = None
225 contributor_id: int = None
227 @validator('kind')
228 def check_kind(cls, v):
229 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AK_vocabulary))[0].display_str
232class TimeseriesAnnotationCreate(TimeseriesAnnotationBase):
234 @validator('kind')
235 def check_kind(cls, v):
236 if tuple(filter(lambda x: x.string == v, toardb.toardb.AK_vocabulary)):
237 return v
238 else:
239 raise ValueError(f"kind of annotation code not known: {v}")
243class TimeseriesAnnotation(TimeseriesAnnotationBase):
244 id: int = Field(..., description="for internal use only")
246 class Config:
247 orm_mode = True
250# ======== TimeseriesProgramme =========
252class TimeseriesProgrammeBase(BaseModel):
253 id: int = None
254 name: str = Field(..., description="(Short) name of programme")
255 longname: str = Field(..., description="(Long) name of programme")
256 homepage: str = Field(..., description="Homepage (URL) of programme")
257 description: str = Field(..., description="Description of programme")
260class TimeseriesProgrammeCreate(TimeseriesProgrammeBase):
261 pass
264class TimeseriesProgramme(TimeseriesProgrammeBase):
265 id: int = Field(..., description="for internal use only")
267 class Config:
268 orm_mode = True
271# ======== TimeseriesChangelog =========
273class TimeseriesChangelogBase(BaseModel):
274 datetime: dt.datetime = Field(..., description="Date of change to the TOAR database")
275 description: str = Field(..., description="Description of change")
276 old_value: str = Field(..., description="value that has been changed in the TOAR database")
277 new_value: str = Field(..., description="new, changed value")
278 timeseries_id: int = Field(..., description="internal ID of time-series to which this change belongs")
279 author_id: int = Field(..., description="internal ID of person who submitted the change to the TOAR database")
280 type_of_change: int = Field(..., description="Type of change (see controlled vocabulary: Type Of Change)")
281 period_start: dt.datetime = Field(None, description="for changes on data: start date from which changes were applied")
282 period_end: dt.datetime = Field(None, description="for changes on data: end date to which changes were applied")
283 version: str = Field(None, description="version that results from this change")
286 @validator('type_of_change')
287 def check_role(cls, v):
288 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CL_vocabulary))[0].display_str
291class TimeseriesChangelog(TimeseriesChangelogBase):
293 class Config:
294 orm_mode = True
298# ======== for nested view/upload =========
300class TimeseriesBase(TimeseriesCoreBase):
301 station: Stationmeta = None
302 variable: Variable = None
303 programme: TimeseriesProgramme = None
304 additional_metadata: Json = None
305 roles: List[TimeseriesRole] = None
306 annotations: List[TimeseriesAnnotation] = None
307 changelog: List[TimeseriesChangelog] = None
308# station: StationmetaCoreCreate
309# station: StationmetaCore
310# station: StationmetaCreate
311# station: StationmetaBase
312# station: Stationmeta
313# Try, which one of the above is wanted for station
314# The next one works:
316 class Config:
317 orm_mode = True
319 @validator('changelog')
320 def order_changelog(cls, v):
321 return sorted(v, key=lambda x: x.datetime)
323 @validator('roles')
324 def check_roles(cls, v):
325 if v == []:
326 return None
327 else:
328 return v
330 @validator('annotations')
331 def check_annotations(cls, v):
332 if v == []:
333 return None
334 else:
335 return v
337 @validator('changelog')
338 def check_changelog(cls, v):
339 if v == []:
340 return None
341 else:
342 return v
345class TimeseriesCreate(TimeseriesCoreCreate):
346 roles: List[TimeseriesRoleNoCreate] = None
347 annotations: List[TimeseriesAnnotation] = None
349 class Config:
350 orm_mode = True
353class TimeseriesPatch(TimeseriesCoreCreate):
354 label: str = None
355 order: int = None
356 sampling_frequency: str = None
357 aggregation: str = None
358 data_origin_type: str = None
359 data_origin: str = None
360 data_start_date: dt.datetime = None
361 data_end_date: dt.datetime = None
362 sampling_height: float = None
363# roles: List[TimeseriesRole] = None
364# annotations: List[TimeseriesAnnotationPatch] = None
365 # just to get things working
366 roles: list = None
367 annotations: list = None
368# variable: Variable = None
369# station: StationmetaCoreBase = None
370# programme: TimeseriesProgramme = None
371 station_id: int = None
372 variable_id: int = None
373 programme_id: int = None
374 additional_metadata: Json = None
376 class Config:
377 orm_mode = True
380class Timeseries(TimeseriesBase):
381 id: int = None
382 # hot fix
383 coordinates: Coordinates = None
384 name: str = None
385# variable_id: int = None
386# station_id: int = None
387 codes: List[str] = None
388 station_country: str = None
389 type: str = None
390 type_of_area: str = None
391 timezone: str = None
392 state: str = None
393 coordinate_validation_status: str = None
394 mean_topography_srtm_alt_90m_year1994: float = None
395 mean_topography_srtm_alt_1km_year1994: float = None
396 max_topography_srtm_relative_alt_5km_year1994: float = None
397 min_topography_srtm_relative_alt_5km_year1994: float = None
398 stddev_topography_srtm_relative_alt_5km_year1994: float = None
399 climatic_zone_year2016: str = None
400 htap_region_tier1_year2010: str = None
401 dominant_landcover_year2012: str = None
402 landcover_description_25km_year2012: str = None
403 dominant_ecoregion_year2017: str = None
404 ecoregion_description_25km_year2017: str = None
405 distance_to_major_road_year2020: float = None
406 mean_stable_nightlights_1km_year2013: float = None
407 mean_stable_nightlights_5km_year2013: float = None
408 max_stable_nightlights_25km_year2013: float = None
409 max_stable_nightlights_25km_year1992: float = None
410 mean_population_density_250m_year2015: float = None
411 mean_population_density_5km_year2015: float = None
412 max_population_density_25km_year2015: float = None
413 mean_population_density_250m_year1990: float = None
414 mean_population_density_5km_year1990: float = None
415 max_population_density_25km_year1990: float = None
416 mean_nox_emissions_10km_year2015: float = None
417 mean_nox_emissions_10km_year2000: float = None
418 toar1_category: str = None
419 toar2_category: str = None
421 class Config:
422 orm_mode = True
424 # hot fix
425 @validator('station_country')
426 def check_station_country(cls, v):
427 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CN_vocabulary))[0].display_str
429 @validator('coordinate_validation_status')
430 def check_coordinate_validation_status(cls, v):
431 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CV_vocabulary))[0].display_str
433 @validator('type')
434 def check_type(cls, v):
435 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.ST_vocabulary))[0].display_str
437 @validator('type_of_area')
438 def check_type_of_area(cls, v):
439 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TA_vocabulary))[0].display_str
441 @validator('timezone')
442 def check_timezone(cls, v):
443 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TZ_vocabulary))[0].display_str
445 @validator('climatic_zone_year2016')
446 def check_climatic_zone(cls, v):
447 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CZ_vocabulary))[0].display_str
449 @validator('toar1_category')
450 def check_toar1_category(cls, v):
451 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TC_vocabulary))[0].display_str
453 @validator('toar2_category')
454 def check_toar2_category(cls, v):
455 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TA_vocabulary))[0].display_str
457 @validator('htap_region_tier1_year2010')
458 def check_htap_region_tier1(cls, v):
459 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TR_vocabulary))[0].display_str
461 @validator('dominant_landcover_year2012')
462 def check_dominant_landcover_year2012(cls, v):
463 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.LC_vocabulary))[0].display_str
465 @validator('landcover_description_25km_year2012')
466 def check_landcover_description_year2012(cls, v):
467 return get_full_description_from_abbreviation(toardb.toardb.LC_vocabulary, v)
469 @validator('ecoregion_description_25km_year2017')
470 def check_ecoregion_description_25km_year2017(cls, v):
471 return get_full_description_from_abbreviation(toardb.toardb.ER_vocabulary, v)
473 @validator('dominant_ecoregion_year2017')
474 def check_dominant_ecoregion_year2017(cls, v):
475 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.ER_vocabulary))[0].display_str
477class TimeseriesFields(Timeseries):
478 roles: TimeseriesRoleBaseFields = None
479 pass
482class TimeseriesWithCitation(TimeseriesBase):
483 id: int
484 citation: str
485 attribution: str = None
486 license: str
488 class Config:
489 orm_mode = True
491 @validator('sampling_frequency')
492 def check_sampling_frequency(cls, v):
493 if tuple(filter(lambda x: x.string == v, toardb.toardb.SF_vocabulary)):
494 return v
495 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.SF_vocabulary)):
496 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.SF_vocabulary))[0].display_str
497 else:
498 raise ValueError(f"sampling frequency not known: {v}")
500 @validator('aggregation')
501 def check_aggregation(cls, v):
502 if tuple(filter(lambda x: x.string == v, toardb.toardb.AT_vocabulary)):
503 return v
504 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary)):
505 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary))[0].display_str
506 else:
507 raise ValueError(f"aggregation type not known: {v}")
509 @validator('data_origin_type')
510 def check_data_origin_type(cls, v):
511 if tuple(filter(lambda x: x.string == v, toardb.toardb.OT_vocabulary)):
512 return v
513 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary)):
514 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary))[0].display_str
515 else:
516 raise ValueError(f"data origin type not known: {v}")
518 @validator('data_origin')
519 def check_data_origin(cls, v):
520 if tuple(filter(lambda x: x.string == v, toardb.toardb.DO_vocabulary)):
521 return v
522 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary)):
523 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary))[0].display_str
524 else:
525 raise ValueError(f"data origin not known: {v}")
527 @validator('additional_metadata')
528 def check_additional_metadata(cls, v):
529# return str(v).replace("'",'"')
530 return v
533Contributors = Union[TimeseriesProgramme, TimeseriesRole]