Coverage for toardb/timeseries/schemas.py: 91%

367 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-03 20:32 +0000

1# SPDX-FileCopyrightText: 2021 Forschungszentrum Jülich GmbH 

2# SPDX-License-Identifier: MIT 

3 

4""" 

5Pydantic schemas for TOAR database 

6 

7""" 

8 

9from typing import List, Any, Union 

10 

11from pydantic import BaseModel, Json, validator, Field 

12import datetime as dt 

13from toardb.contacts.schemas import Contact 

14from toardb.variables.schemas import Variable 

15from toardb.stationmeta.schemas import Stationmeta, Coordinates, get_full_description_from_abbreviation 

16from toardb.utils.utils import get_displaystr_from_value 

17import toardb 

18 

19# ======== Timeseries ========= 

20 

21class TimeseriesCoreBase(BaseModel): 

22 id: int = None 

23 label: str = Field(None, description="a short string to distinguish this timeseries from others with the same combination of station and variable") 

24 order: int = Field(None, description="indicates position of this timeseries in a list when several timeseries share the same station and variable combination") 

25 sampling_frequency: str = Field(None, description="Sampling frequency of data in this timeseries (see controlled vocabulary: Sampling Frequency)") 

26 aggregation: str = Field(None, description="Aggregation type in this timeseries (see controlled vocabulary: Aggregation Type)") 

27 data_start_date: dt.datetime = Field(None, description="Start date of the variable data available for this station") 

28 data_end_date: dt.datetime = Field(None, description="End date of the variable data available for this station") 

29 data_origin: str = Field(None, description="origin of data (model name or instrument) (see controlled vocabulary: Data Origin)") 

30 data_origin_type: str = Field(None, description="type of data origin (see controlled vocabulary: Data Origin Type)") 

31 provider_version: str = Field(None, description="provider data version") 

32 sampling_height: float = Field(None, description="Height above the ground of the inlet/instrument/sampler (in m)") 

33 additional_metadata: Json = Field(None, description="Additional information about the timeseries as JSON structure.") 

34 #only for internal use 

35 data_license_accepted: dt.datetime = Field(None, description="date when provider accepted the data license agreement") 

36 dataset_approved_by_provider: dt.datetime = Field(None, description="date when provider approved the data") 

37 doi : str = Field(None, description="DOI of the data") 

38 coverage: float = Field(None, description="data coverage between the time series data_start_date and data_end_date") 

39 

40# still missing: "Score values from automated data QA (5-star evaluation)" 

41# still missing: "detailed report of timeseries QA/QC evaluation" 

42# still missing: "arbitrary additional information about this timeseries" 

43 

44 @validator('sampling_frequency') 

45 def check_sampling_frequency(cls, v): 

46 if v: 

47 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.SF_vocabulary))[0].display_str 

48 

49 @validator('aggregation') 

50 def check_aggregation(cls, v): 

51 if v: 

52 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary))[0].display_str 

53 

54 @validator('data_origin_type') 

55 def check_data_origin_type(cls, v): 

56 if v: 

57 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary))[0].display_str 

58 

59 @validator('data_origin') 

60 def check_data_origin(cls, v): 

61 if v: 

62 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary))[0].display_str 

63 

64 @validator('additional_metadata') 

65 def check_additional_metadata(cls, v): 

66 if v: 

67 for key, value in v.items(): 

68 if key == 'sampling_type': 

69 v[key] = get_displaystr_from_value(toardb.toardb.KS_vocabulary,int(value)) 

70 if key == 'absorption_cross_section': 

71 v[key] = get_displaystr_from_value(toardb.toardb.CS_vocabulary,int(value)) 

72 if key == 'calibration_type': 

73 v[key] = get_displaystr_from_value(toardb.toardb.CT_vocabulary,int(value)) 

74 return v 

75 

76 

77class TimeseriesCoreCreate(TimeseriesCoreBase): 

78 station_id: int 

79 variable_id: int 

80 programme_id: int = None 

81 

82 @validator('sampling_frequency') 

83 def check_sampling_frequency(cls, v): 

84 if tuple(filter(lambda x: x.string == v, toardb.toardb.SF_vocabulary)): 

85 return v 

86 else: 

87 raise ValueError(f"sampling frequency not known: {v}") 

88 

89 @validator('aggregation') 

90 def check_aggregation(cls, v): 

91 if tuple(filter(lambda x: x.string == v, toardb.toardb.AT_vocabulary)): 

92 return v 

93 else: 

94 raise ValueError(f"aggregation type not known: {v}") 

95 

96 @validator('data_origin_type') 

97 def check_data_origin_type(cls, v): 

98 if tuple(filter(lambda x: x.string == v, toardb.toardb.OT_vocabulary)): 

99 return v 

100 else: 

101 raise ValueError(f"data origin type not known: {v}") 

102 

103 @validator('data_origin') 

104 def check_data_origin(cls, v): 

105 if tuple(filter(lambda x: x.string == v, toardb.toardb.DO_vocabulary)): 

106 return v 

107 else: 

108 raise ValueError(f"data origin not known: {v}") 

109 

110 # overwrite validator in creation process 

111 @validator('additional_metadata') 

112 def check_additional_metadata(cls, v): 

113 return v 

114 

115 

116class TimeseriesCore(TimeseriesCoreBase): 

117 id: int 

118 

119 class Config: 

120 orm_mode = True 

121 

122# ======== TimeseriesRole ========= 

123 

124class TimeseriesRoleBase(BaseModel): 

125 id: int = None 

126 role: str = Field(None, description="Role of contact (see controlled vocabulary: Role Code)") 

127 status: str = Field(None, description="Status of contact (see controlled vocabulary: Role Status)") 

128 contact: Contact = Field(None, description="Contact for this role") 

129 

130 @validator('role') 

131 def check_role(cls, v): 

132 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.RC_vocabulary))[0].display_str 

133 

134 @validator('status') 

135 def check_status(cls, v): 

136 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.RS_vocabulary))[0].display_str 

137 

138 @validator('contact') 

139 def check_contact_is_private(cls, v): 

140 if v.person: 

141 if (v.person.id != 0): 

142 if (v.person.isprivate): 

143 v.person = None 

144 return v 

145 

146 class Config: 

147 orm_mode = True 

148 

149 

150class TimeseriesRoleBaseFields(TimeseriesRoleBase): 

151 contact_id: int = None 

152 

153 class Config: 

154 orm_mode = True 

155 

156 

157class TimeseriesRoleCreate(TimeseriesRoleBase): 

158 contact: Contact = None 

159 contact_id: int 

160 

161 @validator('role') 

162 def check_role(cls, v): 

163 if tuple(filter(lambda x: x.string == v, toardb.toardb.RC_vocabulary)): 

164 return v 

165 else: 

166 raise ValueError(f"role code not known: {v}") 

167 

168 @validator('status') 

169 def check_status(cls, v): 

170 if tuple(filter(lambda x: x.string == v, toardb.toardb.RS_vocabulary)): 

171 return v 

172 else: 

173 raise ValueError(f"role status not known: {v}") 

174 

175 class Config: 

176 orm_mode = True 

177 

178# do not create a role while creating a timeseries! 

179class TimeseriesRoleNoCreate(TimeseriesRoleCreate): 

180 contact_id: int 

181# contact: Contact = None 

182 

183 class Config: 

184 orm_mode = True 

185 

186 

187class TimeseriesRole(TimeseriesRoleBase): 

188 id: int = Field(..., description="for internal use only") 

189 

190 class Config: 

191 orm_mode = True 

192 

193class TimeseriesRoleFields(TimeseriesRoleCreate): 

194 pass 

195 

196 @validator('role') 

197 def check_role(cls, v): 

198 return v 

199 

200 @validator('status') 

201 def check_status(cls, v): 

202 return v 

203 

204 

205# ======== TimeseriesAnnotation ========= 

206 

207class TimeseriesAnnotationBase(BaseModel): 

208 id: int = None 

209 kind: str = Field(..., description="kind of annotation (see controlled vocabulary: Kind Of Annotation)") 

210 text: str = Field(..., description="text of annotation") 

211 date_added: dt.datetime = Field(..., description="timestamp when annotation was added") 

212 approved: bool = Field(..., description="Flag indicating whether the annotation of a time-series has been verified") 

213 contributor_id: int = Field(..., description="ID of contributor who added the annotation") 

214 

215 @validator('kind') 

216 def check_kind(cls, v): 

217 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AK_vocabulary))[0].display_str 

218 

219 

220class TimeseriesAnnotationPatch(BaseModel): 

221 kind: int = None 

222 text: str = None 

223 date_added: dt.datetime = None 

224 approved: bool = None 

225 contributor_id: int = None 

226 

227 @validator('kind') 

228 def check_kind(cls, v): 

229 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AK_vocabulary))[0].display_str 

230 

231 

232class TimeseriesAnnotationCreate(TimeseriesAnnotationBase): 

233 

234 @validator('kind') 

235 def check_kind(cls, v): 

236 if tuple(filter(lambda x: x.string == v, toardb.toardb.AK_vocabulary)): 

237 return v 

238 else: 

239 raise ValueError(f"kind of annotation code not known: {v}") 

240 

241 

242 

243class TimeseriesAnnotation(TimeseriesAnnotationBase): 

244 id: int = Field(..., description="for internal use only") 

245 

246 class Config: 

247 orm_mode = True 

248 

249 

250# ======== TimeseriesProgramme ========= 

251 

252class TimeseriesProgrammeBase(BaseModel): 

253 id: int = None 

254 name: str = Field(..., description="(Short) name of programme") 

255 longname: str = Field(..., description="(Long) name of programme") 

256 homepage: str = Field(..., description="Homepage (URL) of programme") 

257 description: str = Field(..., description="Description of programme") 

258 

259 

260class TimeseriesProgrammeCreate(TimeseriesProgrammeBase): 

261 pass 

262 

263 

264class TimeseriesProgramme(TimeseriesProgrammeBase): 

265 id: int = Field(..., description="for internal use only") 

266 

267 class Config: 

268 orm_mode = True 

269 

270 

271# ======== TimeseriesChangelog ========= 

272 

273class TimeseriesChangelogBase(BaseModel): 

274 datetime: dt.datetime = Field(..., description="Date of change to the TOAR database") 

275 description: str = Field(..., description="Description of change") 

276 old_value: str = Field(..., description="value that has been changed in the TOAR database") 

277 new_value: str = Field(..., description="new, changed value") 

278 timeseries_id: int = Field(..., description="internal ID of time-series to which this change belongs") 

279 author_id: int = Field(..., description="internal ID of person who submitted the change to the TOAR database") 

280 type_of_change: int = Field(..., description="Type of change (see controlled vocabulary: Type Of Change)") 

281 period_start: dt.datetime = Field(None, description="for changes on data: start date from which changes were applied") 

282 period_end: dt.datetime = Field(None, description="for changes on data: end date to which changes were applied") 

283 version: str = Field(None, description="version that results from this change") 

284 

285 

286 @validator('type_of_change') 

287 def check_role(cls, v): 

288 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CL_vocabulary))[0].display_str 

289 

290 

291class TimeseriesChangelog(TimeseriesChangelogBase): 

292 

293 class Config: 

294 orm_mode = True 

295 

296 

297 

298# ======== for nested view/upload ========= 

299 

300class TimeseriesBase(TimeseriesCoreBase): 

301 station: Stationmeta = None 

302 variable: Variable = None 

303 programme: TimeseriesProgramme = None 

304 additional_metadata: Json = None 

305 roles: List[TimeseriesRole] = None 

306 annotations: List[TimeseriesAnnotation] = None 

307 changelog: List[TimeseriesChangelog] = None 

308# station: StationmetaCoreCreate 

309# station: StationmetaCore 

310# station: StationmetaCreate 

311# station: StationmetaBase 

312# station: Stationmeta 

313# Try, which one of the above is wanted for station 

314# The next one works: 

315 

316 class Config: 

317 orm_mode = True 

318 

319 @validator('changelog') 

320 def order_changelog(cls, v): 

321 return sorted(v, key=lambda x: x.datetime) 

322 

323 @validator('roles') 

324 def check_roles(cls, v): 

325 if v == []: 

326 return None 

327 else: 

328 return v 

329 

330 @validator('annotations') 

331 def check_annotations(cls, v): 

332 if v == []: 

333 return None 

334 else: 

335 return v 

336 

337 @validator('changelog') 

338 def check_changelog(cls, v): 

339 if v == []: 

340 return None 

341 else: 

342 return v 

343 

344 

345class TimeseriesCreate(TimeseriesCoreCreate): 

346 roles: List[TimeseriesRoleNoCreate] = None 

347 annotations: List[TimeseriesAnnotation] = None 

348 

349 class Config: 

350 orm_mode = True 

351 

352 

353class TimeseriesPatch(TimeseriesCoreCreate): 

354 label: str = None 

355 order: int = None 

356 sampling_frequency: str = None 

357 aggregation: str = None 

358 data_origin_type: str = None 

359 data_origin: str = None 

360 data_start_date: dt.datetime = None 

361 data_end_date: dt.datetime = None 

362 sampling_height: float = None 

363# roles: List[TimeseriesRole] = None 

364# annotations: List[TimeseriesAnnotationPatch] = None 

365 # just to get things working 

366 roles: list = None 

367 annotations: list = None 

368# variable: Variable = None 

369# station: StationmetaCoreBase = None 

370# programme: TimeseriesProgramme = None 

371 station_id: int = None 

372 variable_id: int = None 

373 programme_id: int = None 

374 additional_metadata: Json = None 

375 

376 class Config: 

377 orm_mode = True 

378 

379 

380class Timeseries(TimeseriesBase): 

381 id: int = None 

382 # hot fix 

383 coordinates: Coordinates = None 

384 name: str = None 

385# variable_id: int = None 

386# station_id: int = None 

387 codes: List[str] = None 

388 station_country: str = None 

389 type: str = None 

390 type_of_area: str = None 

391 timezone: str = None 

392 state: str = None 

393 coordinate_validation_status: str = None 

394 mean_topography_srtm_alt_90m_year1994: float = None 

395 mean_topography_srtm_alt_1km_year1994: float = None 

396 max_topography_srtm_relative_alt_5km_year1994: float = None 

397 min_topography_srtm_relative_alt_5km_year1994: float = None 

398 stddev_topography_srtm_relative_alt_5km_year1994: float = None 

399 climatic_zone_year2016: str = None 

400 htap_region_tier1_year2010: str = None 

401 dominant_landcover_year2012: str = None 

402 landcover_description_25km_year2012: str = None 

403 dominant_ecoregion_year2017: str = None 

404 ecoregion_description_25km_year2017: str = None 

405 distance_to_major_road_year2020: float = None 

406 mean_stable_nightlights_1km_year2013: float = None 

407 mean_stable_nightlights_5km_year2013: float = None 

408 max_stable_nightlights_25km_year2013: float = None 

409 max_stable_nightlights_25km_year1992: float = None 

410 mean_population_density_250m_year2015: float = None 

411 mean_population_density_5km_year2015: float = None 

412 max_population_density_25km_year2015: float = None 

413 mean_population_density_250m_year1990: float = None 

414 mean_population_density_5km_year1990: float = None 

415 max_population_density_25km_year1990: float = None 

416 mean_nox_emissions_10km_year2015: float = None 

417 mean_nox_emissions_10km_year2000: float = None 

418 toar1_category: str = None 

419 toar2_category: str = None 

420 

421 class Config: 

422 orm_mode = True 

423 

424 # hot fix 

425 @validator('station_country') 

426 def check_station_country(cls, v): 

427 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CN_vocabulary))[0].display_str 

428 

429 @validator('coordinate_validation_status') 

430 def check_coordinate_validation_status(cls, v): 

431 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CV_vocabulary))[0].display_str 

432 

433 @validator('type') 

434 def check_type(cls, v): 

435 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.ST_vocabulary))[0].display_str 

436 

437 @validator('type_of_area') 

438 def check_type_of_area(cls, v): 

439 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TA_vocabulary))[0].display_str 

440 

441 @validator('timezone') 

442 def check_timezone(cls, v): 

443 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TZ_vocabulary))[0].display_str 

444 

445 @validator('climatic_zone_year2016') 

446 def check_climatic_zone(cls, v): 

447 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.CZ_vocabulary))[0].display_str 

448 

449 @validator('toar1_category') 

450 def check_toar1_category(cls, v): 

451 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TC_vocabulary))[0].display_str 

452 

453 @validator('toar2_category') 

454 def check_toar2_category(cls, v): 

455 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TA_vocabulary))[0].display_str 

456 

457 @validator('htap_region_tier1_year2010') 

458 def check_htap_region_tier1(cls, v): 

459 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.TR_vocabulary))[0].display_str 

460 

461 @validator('dominant_landcover_year2012') 

462 def check_dominant_landcover_year2012(cls, v): 

463 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.LC_vocabulary))[0].display_str 

464 

465 @validator('landcover_description_25km_year2012') 

466 def check_landcover_description_year2012(cls, v): 

467 return get_full_description_from_abbreviation(toardb.toardb.LC_vocabulary, v) 

468 

469 @validator('ecoregion_description_25km_year2017') 

470 def check_ecoregion_description_25km_year2017(cls, v): 

471 return get_full_description_from_abbreviation(toardb.toardb.ER_vocabulary, v) 

472 

473 @validator('dominant_ecoregion_year2017') 

474 def check_dominant_ecoregion_year2017(cls, v): 

475 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.ER_vocabulary))[0].display_str 

476 

477class TimeseriesFields(Timeseries): 

478 roles: TimeseriesRoleBaseFields = None 

479 pass 

480 

481 

482class TimeseriesWithCitation(TimeseriesBase): 

483 id: int 

484 citation: str 

485 attribution: str = None 

486 license: str 

487 

488 class Config: 

489 orm_mode = True 

490 

491 @validator('sampling_frequency') 

492 def check_sampling_frequency(cls, v): 

493 if tuple(filter(lambda x: x.string == v, toardb.toardb.SF_vocabulary)): 

494 return v 

495 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.SF_vocabulary)): 

496 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.SF_vocabulary))[0].display_str 

497 else: 

498 raise ValueError(f"sampling frequency not known: {v}") 

499 

500 @validator('aggregation') 

501 def check_aggregation(cls, v): 

502 if tuple(filter(lambda x: x.string == v, toardb.toardb.AT_vocabulary)): 

503 return v 

504 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary)): 

505 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.AT_vocabulary))[0].display_str 

506 else: 

507 raise ValueError(f"aggregation type not known: {v}") 

508 

509 @validator('data_origin_type') 

510 def check_data_origin_type(cls, v): 

511 if tuple(filter(lambda x: x.string == v, toardb.toardb.OT_vocabulary)): 

512 return v 

513 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary)): 

514 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.OT_vocabulary))[0].display_str 

515 else: 

516 raise ValueError(f"data origin type not known: {v}") 

517 

518 @validator('data_origin') 

519 def check_data_origin(cls, v): 

520 if tuple(filter(lambda x: x.string == v, toardb.toardb.DO_vocabulary)): 

521 return v 

522 elif tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary)): 

523 return tuple(filter(lambda x: x.value == int(v), toardb.toardb.DO_vocabulary))[0].display_str 

524 else: 

525 raise ValueError(f"data origin not known: {v}") 

526 

527 @validator('additional_metadata') 

528 def check_additional_metadata(cls, v): 

529# return str(v).replace("'",'"') 

530 return v 

531 

532 

533Contributors = Union[TimeseriesProgramme, TimeseriesRole]