1 module dhdf5.dataset;
2 
3 private
4 {
5 	import std.traits : isDynamicArray;
6 	import std.range : isInputRange;
7 	import dhdf5.dataspec : DataSpecification;
8 
9 	/// array is dynamic array whose dimensions sizes are set using dim array elements
10 	/// as sizes.
11 	auto setDynArrayDimensions(T)(ref T arr, const size_t[] dim) if(isDynamicArray!T)
12 	{
13 		auto setDimImpl(T)(ref T t, const size_t[] dim)
14 		{
15 			static if(isDynamicArray!T)
16 			{
17 				static if(isDynamicArray!(typeof(t[0])))
18 				{
19 					t.length = dim[0];
20 					foreach(ref e; t[0..$])
21 					{
22 						e.length = dim[1];
23 					}
24 
25 					return setDimImpl(t[0], dim[1..$]);
26 				}
27 				else
28 				{
29 					t.length = dim[0];
30 				}
31 			}
32 
33 		}
34 
35 		return setDimImpl(arr, dim);
36 	}
37 
38 	unittest
39 	{
40 		size_t[] dim = [1, 3, 5];
41 		int[][][] res;
42 		setDynArrayDimensions(res, dim);
43 		assert(res.length == 1);
44 		assert(res[0].length == 3);
45 		assert(res[0][0].length == 5);
46 
47 		int[] res2;
48 		setDynArrayDimensions(res2, dim);
49 		assert(res2.length == 1);
50 	}
51 
52 	template rankOf(R)
53 		if (isInputRange!R)
54 	{
55 		auto rankOfImpl(Range)()
56 		{
57 			import std.range : ElementType;
58 
59 			static if (isInputRange!Range)
60 			{
61 				return 1 + rankOfImpl!(ElementType!Range);
62 			}
63 			else
64 				return 0;
65 		}
66 
67 		enum rankOf = rankOfImpl!R;
68 	}
69 
70 	unittest
71 	{
72 		{
73 			enum rank = rankOf!(uint[]);
74 			static assert (rank == 1);
75 		}
76 
77 		{
78 			enum rank = rankOf!(uint[][][][][][]);
79 			static assert (rank == 6);
80 		}
81 
82 		{
83 			import std.range : only;
84 
85 			auto range = only(1, 2);
86 			alias Range = typeof(range);
87 
88 			enum rank = rankOf!Range;
89 			static assert (rank == 1);
90 		}
91 
92 		{
93 			import std.range : only;
94 
95 			auto range = only(1, 2);
96 			alias Range = typeof(range);
97 
98 			auto ror = only(range, range);
99 			alias RoR = typeof(ror);
100 
101 			enum rank = rankOf!RoR;
102 			static assert (rank == 2);
103 		}
104 	}
105 
106 	struct Dataspace
107 	{
108 		import hdf5.hdf5 : hid_t, H5Dget_space, H5Sclose;
109 
110 		hid_t hid = -1;
111 
112 		this(hid_t dataset)
113 		{
114 			hid = H5Dget_space (dataset);
115 			assert(hid >= 0);
116 		}
117 
118 		~this()
119 		{
120 			if (hid != -1)
121 			{
122 				H5Sclose (hid);
123 				hid = -1;
124 			}
125 		}
126 
127 		alias hid this;
128 	}
129 }
130 
131 struct Dataset(Data, DataSpecType = typeof(DataSpecification!Data.make()))
132 	if (isInputRange!Data)
133 {
134 	import std.range : ElementType, hasLength;
135 	import std.traits : isImplicitlyConvertible;
136 	import std.typecons : RefCounted;
137 	import hdf5.hdf5 : hid_t, hsize_t, H5Sget_simple_extent_ndims, H5Dclose;
138 	import dhdf5.file : H5File;
139 
140 	static assert (isInputRange!Data, Data.stringof ~ " should be input range");
141 
142 	enum rank = rankOf!Data;
143 	alias Type = RefCounted!(Dataset!(Data, DataSpecType));
144 
145 	public
146 	{
147 		this(hid_t dataset, DataSpecType data_spec)
148 		{
149 			import std.exception : enforce;
150 			enforce (dataset != -1);
151 			_dataset = dataset;
152 			_data_spec = data_spec;
153 
154 			debug
155 			{
156 				import hdf5.hdf5 : H5Sget_simple_extent_ndims;
157 
158 				auto space_id = Dataspace (_dataset);
159 				assert (rank == H5Sget_simple_extent_ndims (space_id));
160 			}
161 		}
162 
163 		~this()
164 		{
165 			import hdf5.hdf5 : H5Dclose;
166 
167 			if (_dataset != -1)
168 			{
169 				H5Dclose(_dataset);
170 				_dataset = -1;
171 			}
172 		}
173 	}
174 
175 	static create(ref const(H5File) file, string name)
176 	{
177 		import std..string: toStringz;
178 		import hdf5.hdf5 : H5P_DEFAULT, H5P_DATASET_CREATE, H5Pcreate, H5Pclose,
179 			H5Pset_chunk, H5Screate_simple, H5Dcreate2;
180 		import dhdf5.dataspec : countDimensions;
181 		import std.conv: castFrom;
182 
183 		enum DEFAULT_CHUNK_SIZE = 512;
184 		auto dcpl_id = H5P_DEFAULT;
185 
186 		/* Create a dataset creation property list and set it to use chunking
187 		 */
188 		auto max_dim = countDimensions!(Data)();
189 		auto curr_dim = max_dim.dup;
190 		curr_dim[] = 0;
191 		hsize_t[] chunk_dims;
192 		chunk_dims.length = curr_dim.length;
193 		chunk_dims[] = DEFAULT_CHUNK_SIZE;
194 		dcpl_id = H5Pcreate (H5P_DATASET_CREATE);
195 		scope(exit) H5Pclose (dcpl_id);
196 
197 		H5Pset_chunk (dcpl_id, castFrom!size_t.to!int(chunk_dims.length), chunk_dims.ptr);
198 
199 		auto space = H5Screate_simple (castFrom!(size_t).to!int(curr_dim.length), curr_dim.ptr, max_dim.ptr);
200 		auto data_spec = DataSpecType.make();
201 		auto dataset = H5Dcreate2 (file.tid, name.toStringz, data_spec.tid, space, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
202 		assert(dataset >= 0);
203 		return RefCounted!(Dataset!(Data, DataSpecType))(dataset, data_spec);
204 	}
205 
206 	static open(ref const(H5File) file, string name)
207 	{
208 		import std..string: toStringz;
209 		import hdf5.hdf5 : H5P_DEFAULT, H5Dopen2;
210 
211 		auto data_spec = DataSpecType.make();
212 		auto dataset = H5Dopen2 (file.tid, name.toStringz, H5P_DEFAULT);
213 		assert(dataset >= 0);
214 		return RefCounted!(Dataset!(Data, DataSpecType))(dataset, data_spec);
215 	}
216 
217 	/**
218 	 * Return current shape, that can change during programm running.
219 	 */
220 	auto currShape() const
221 	{
222 		import hdf5.hdf5 : H5Sget_simple_extent_dims;
223 
224 		if (_dataset == -1)
225 			return typeof(_curr_shape).init;
226 
227 		auto space_id  = Dataspace (_dataset);
228 
229 		H5Sget_simple_extent_dims (space_id, _curr_shape.ptr, _max_shape.ptr);
230 
231 		return _curr_shape;
232 	}
233 
234 	auto currShape(hsize_t[] extent)
235 	{
236 		import hdf5.hdf5;
237 
238 		assert (_dataset != -1);
239 		auto status = H5Dset_extent (_dataset, extent.ptr);
240 		assert(status >= 0);
241 	}
242 
243 	/**
244 	 * Return maximal shape, that doesn't change during program running.
245 	 */
246 	auto maxShape() const pure
247 	{
248 		return _max_shape;
249 	}
250 
251 	/*
252 	 * Read data from the dataset.
253 	 */
254 	auto read() const
255 	{
256 		import hdf5.hdf5;
257 
258 		ElementType!Data[] data;
259 		auto filespace = Dataspace(_dataset);
260 
261 		// get current size
262 		auto offset = currShape().dup;
263 		// set offset to zero for all dimensions
264 		offset[] = 0;
265 		herr_t status;
266 
267 		setDynArrayDimensions(data, currShape());
268 		/*
269 		 * Define the memory space to read dataset.
270 		 */
271 		auto memspace = H5Screate_simple(rank, currShape().ptr, null);
272 		scope(exit) H5Sclose(memspace);
273 
274 		/*
275 		 * Read dataset
276 		 */
277 		status = H5Dread(_dataset, _data_spec.tid, memspace, filespace,
278 				 H5P_DEFAULT, data.ptr);
279 		assert(status >= 0);
280 
281 		return data;
282 	}
283 
284 	/**
285 	 * Read count data from file starting with offset
286 	 */
287 	auto read(hsize_t[] offset, hsize_t[] count) const
288 	{
289 		import hdf5.hdf5 : H5Sselect_hyperslab, H5Screate_simple, H5Dread, H5Sclose,
290 			H5S_seloper_t, H5P_DEFAULT;
291 
292 		//assert((offset+count) <= _max_shape[0]);
293 		/*
294 		* get the file dataspace.
295 		*/
296 		auto dataspace = Dataspace (_dataset); // dataspace identifier
297 
298 		auto status = H5Sselect_hyperslab (dataspace, H5S_seloper_t.H5S_SELECT_SET, offset.ptr, null, count.ptr, null);
299 		assert(status >= 0);
300 		/*
301 		* Define memory dataspace.
302 		*/
303 		auto mem_offset = offset;
304 		mem_offset[] = 0;
305 		auto mem_count  = count;
306 		auto dimsm = mem_count;
307 		auto memspace = H5Screate_simple (rank, dimsm.ptr, null);
308 		scope(exit) H5Sclose (memspace);
309 
310 		/*
311 		* Define memory hyperslab.
312 		*/
313 		status = H5Sselect_hyperslab (memspace, H5S_seloper_t.H5S_SELECT_SET, mem_offset.ptr, null, mem_count.ptr, null);
314 		assert(status >=0);
315 
316 		ElementType!Data[] data;
317 		setDynArrayDimensions (data, count);
318 
319 		status = H5Dread (_dataset, _data_spec.tid, memspace, dataspace, H5P_DEFAULT, data.ptr);
320 		assert(status >= 0);
321 		return data;
322 	}
323 
324 	void write(Range, Args...)(Range range, Args args)
325 		if (isImplicitlyConvertible!(ElementType!Range, ElementType!Data) &&
326 			!is(Range == ElementType!(Data)[]))
327 	{
328 		import std.container : Array;
329 
330 		auto buffer = Array!(ElementType!Data)(range);
331 
332 		write((&buffer[0])[0..buffer.length], args);
333 	}
334 
335 	/*
336 	 * Write data to the dataset
337 	 */
338 	auto write(ElementType!Data[] data)
339 	{
340 		hsize_t[rank] offset;
341 		offset[] = 0;
342 		write(data, offset[]);
343 	}
344 
345 	/*
346 	 * Write data to the dataset;
347 	 */
348 	auto write(ElementType!Data[] data, const(hsize_t)[] offset)
349 	{
350 		import std.exception : enforce;
351 		enforce (data.length+offset[0] <= currShape[0], "Bounds checking failed!");
352 
353 		import hdf5.hdf5 : herr_t, H5Sselect_hyperslab, H5Screate_simple, H5Dwrite,
354 			H5S_seloper_t, H5P_DEFAULT, H5Sclose;
355 
356 		assert(offset.length == rank);
357 		hsize_t[rank] count = [data.length];
358 		assert(  count.length == rank);
359 		herr_t status;
360 
361 		auto filespace = Dataspace (_dataset);
362 		status = H5Sselect_hyperslab (filespace, H5S_seloper_t.H5S_SELECT_SET, offset.ptr, null, count.ptr, null);
363 		assert(status >= 0);
364 
365 		auto memspace = H5Screate_simple (rank, count.ptr, null);
366 		scope(exit) H5Sclose (memspace);
367 
368 		status = H5Dwrite (_dataset, _data_spec.tid, memspace, filespace, H5P_DEFAULT, data.ptr);
369 		assert(status >= 0);
370 	}
371 
372 	auto remove(hsize_t idx)
373 	{
374 		auto l = currShape[0];
375 		if (idx != l-1)
376 		{
377 			auto buffer = read([idx+1], [currShape[0]-idx-1]);
378 			write(buffer, [idx]);
379 		}
380 		currShape = [l - 1];
381 	}
382 
383 	auto remove(IndexRange)(IndexRange index_range)
384 	{
385 
386 	}
387 
388 	Range opSlice()
389 	{
390 		return Range(this, 0, currShape[0]);
391 	}
392 
393 	auto add(ElementType!Data element)
394 	{
395 		// set new shape
396 		auto last_index = currShape[0];
397 		currShape = [last_index+1];
398 		write([element], [last_index]);
399 	}
400 
401 	auto add(Range)(Range range)
402 		if (is(ElementType!Range : ElementType!Data) && hasLength!Range)
403 	{
404 		// set new shape
405 		auto last_index = currShape[0];
406 		currShape = [last_index+range.length];
407 		write(range, [last_index]);
408 	}
409 
410 	struct Range
411 	{
412 		// index of starting element + element count of the range
413 		private
414 		{
415 			size_t _start, _length;
416 			Dataset* _dataset;
417 		}
418 
419 		@disable
420 		this();
421 
422 		this(ref Dataset dataset, size_t start, size_t length)
423 		{
424 			_dataset = &dataset;
425 			_start = start;
426 			_length = length;
427 		}
428 
429 		bool empty() const
430 		{
431 			return _length == 0;
432 		}
433 
434 		ref ElementType!Data front() const
435 		{
436 			return (*_dataset)[_start];
437 		}
438 
439 		ref ElementType!Data back() const
440 		{
441 			return (*_dataset)[_start+_length-1];
442 		}
443 
444 		void popFront()
445 		{
446 			import std.exception : enforce;
447 
448 			enforce (!empty);
449 
450 			_start++;
451 			_length--;
452 		}
453 
454 		void popBack()
455 		{
456 			import std.exception : enforce;
457 
458 			enforce (!empty);
459 
460 			_length--;
461 		}
462 
463 		auto save() const
464 		{
465 			return this;
466 		}
467 
468 		ref auto opIndex(size_t index) const
469 		{
470 			return (*_dataset)[index];
471 		}
472 
473 		size_t length() const
474 		{
475 			return _length;
476 		}
477 	}
478 
479 	ref ElementType!Data opIndex(size_t index) const
480 	{
481 		if (index >= currShape[0])
482 			throw new Exception("Bounds");
483 
484 		return read([index], [1])[0];
485 	}
486 
487 	auto opIndexAssign(ElementType!Data element, size_t index)
488 	{
489 		write([element], [index]);
490 	}
491 
492 	auto opOpAssign(string op)(ElementType!Data rhs)
493 	{
494 		static if (op == "~")
495 		{
496 			add(rhs);
497 		}
498 	}
499 
500 	auto opOpAssign(string op, Range)(Range r)
501 	{
502 		static if (op == "~")
503 		{
504 			add(r);
505 		}
506 	}
507 
508 	auto tid()
509 	{
510 		return _dataset;
511 	}
512 
513 private:
514 	hid_t _dataset = -1;
515 	DataSpecType _data_spec;
516 	// Current shape of dataset
517 	hsize_t[rank] _curr_shape;
518 	// Maximal shape of dataset
519 	hsize_t[rank] _max_shape;
520 }