001    /*
002     * Copyright (c) 1998-2014 ChemAxon Ltd. All Rights Reserved.
003     *
004     * This software is the confidential and proprietary information of
005     * ChemAxon. You shall not disclose such Confidential Information
006     * and shall use it only in accordance with the terms of the agreements
007     * you entered into with ChemAxon.
008     *
009     */
010    package com.chemaxon.overlap.io;
011    
012    import com.chemaxon.calculations.common.SubProgressObserver;
013    import com.google.common.base.Optional;
014    import java.io.IOException;
015    import java.io.ObjectInputStream;
016    import java.io.ObjectOutputStream;
017    import java.io.Serializable;
018    import java.io.UnsupportedEncodingException;
019    import java.util.ArrayList;
020    import java.util.List;
021    
022    /**
023     * Concatenated UTF-8 byte array backed master String storage.
024     *
025     * <p>
026     * Note that this implementation treats empty Strings as absent.</p>
027     *
028     * @author Gabor Imre
029     */
030    public class CompactStringStorage implements MasterStorage<String>, Serializable {
031    
032        /**
033         * Serial version.
034         */
035        private static final long serialVersionUID = 0;
036    
037        /**
038         * Start marker.
039         *
040         * <p>
041         * Used for consistency check; in the future might be used for versioning.</p>
042         */
043        private static final long MAGIC_START = 0xca0ef9ea028a84ffL;
044    
045        /**
046         * End marker.
047         *
048         * <p>
049         * Used only for consistency checks.</p>
050         */
051        private static final long MAGIC_END = 0x821c606b2b04deeeL;
052    
053        /**
054         * Number of master entities stored on one page.
055         *
056         * <p>
057         * All but the last page is expected to contain the given entity count.</p>
058         */
059        private final int pagesize;
060    
061        /**
062         * Total number of absent master entities, including skippeds.
063         */
064        private final int absentCount;
065    
066        /**
067         * Total number of covered master entities, including absents (and skippeds).
068         */
069        private final int size;
070    
071        /**
072         * Number of initial skips.
073         *
074         * <p>
075         * The given number of master entities are all absent, they are not stored.</p>
076         */
077        private final int skips;
078    
079        /**
080         * Index of the first byte of the <code>i * pagesize + j</code>-th non-skipped structure.
081         */
082        private final int[][] starts;
083    
084        /**
085         * Storage array.
086         */
087        private final byte[][] storage;
088    
089        /**
090         * Constructor used by custom deserialization.
091         *
092         * @param pagesize Pagesize
093         * @param absentcount Absent count
094         * @param size Size
095         * @param skips Skips
096         * @param starts Starts
097         * @param storage Storage
098         */
099        private CompactStringStorage(int pagesize, int absentcount, int size, int skips, int[][] starts, byte[][] storage) {
100            this.pagesize = pagesize;
101            this.absentCount = absentcount;
102            this.size = size;
103            this.skips = skips;
104            this.starts = starts;
105            this.storage = storage;
106        }
107    
108        /**
109         * Construct.
110         *
111         * @param pagesize Page size
112         * @param skips Number of initial skips
113         * @param starts Start positions
114         * @param pages Data pages
115         */
116        CompactStringStorage(int pagesize, int skips, List<int[]> starts, List<byte[]> pages) {
117            if (pagesize < 1) {
118                throw new IllegalArgumentException("Invalid pagesize " + pagesize);
119            }
120            if (skips < 0) {
121                throw new IllegalArgumentException("Invalid skips " + skips);
122            }
123    
124            this.pagesize = pagesize;
125            this.skips = skips;
126    
127            // consistency check
128            if (pages.isEmpty() && skips == 0) {
129                throw new IllegalArgumentException("Empty storage");
130            }
131            if (pages.size() != starts.size()) {
132                throw new IllegalArgumentException("Mismatch in data structures. Pages: " + pages.size() + " starts: "
133                        + starts.size());
134            }
135    
136            int s = skips;
137            int a = skips;
138    
139            this.starts = new int[pages.size()][];
140            this.storage = new byte[pages.size()][];
141    
142            // Run through all pages
143            for (int i = 0; i < this.starts.length; i++) {
144    
145                this.starts[i] = starts.get(i);
146                this.storage[i] = pages.get(i);
147                if (this.starts[i].length != pagesize && i < this.starts.length - 1) {
148                    throw new IllegalArgumentException("Page " + i + " of " + pages.size() + " size mismatch. Got "
149                            + this.starts[i].length + " expected " + pagesize);
150                }
151                s += this.starts[i].length;
152                for (int j = 0; j < this.starts[i].length; j++) {
153                    // Length of the current entity
154                    final int len;
155                    if (j == this.starts[i].length - 1) {
156                        // this is the last entity
157                        len = this.storage[i].length - this.starts[i][j];
158                    } else {
159                        len = this.starts[i][j + 1] - this.starts[i][j];
160                    }
161    
162                    //final int ns = (j == this.starts[i].length - 1) ? this.storage[i].length : this.starts[i][j + 1];
163                    if (len == 0) {
164                        a++;
165                    }
166                }
167            }
168    
169            this.size = s;
170            this.absentCount = a;
171    
172        }
173    
174        @Override
175        public String toString() {
176            int bytes = 0;
177            for (byte[] b : this.storage) {
178                bytes += b.length;
179            }
180    
181            return "Compact string storage; size: " + this.size + " absents: " + this.absentCount + " skips: " + this.skips
182                    + " pagesize: " + this.pagesize + " bytes: " + bytes;
183        }
184    
185        @Override
186        public int size() {
187            return this.size;
188        }
189    
190        @Override
191        public int getAbsentCount() {
192            return this.absentCount;
193        }
194    
195        @Override
196        public Optional<String> get(int index) {
197            if (index < 0) {
198                throw new IndexOutOfBoundsException("Negative index " + index);
199            }
200            if (index >= this.size) {
201                throw new IndexOutOfBoundsException("Invalid index " + index + "; size is  " + this.size);
202            }
203            if (index < this.skips) {
204                return Optional.<String>absent();
205            }
206    
207            // index of the storing page
208            final int page = (index - this.skips) / this.pagesize;
209    
210            // position within the storing page
211            final int pos = (index - this.skips) - page * this.pagesize;
212    
213            // start index of the
214            final int start = this.starts[page][pos];
215    
216            // length of the associated byte array
217            final int len;
218    
219            if (pos == this.starts[page].length - 1) {
220                // this is the last entity
221                len = this.storage[page].length - start;
222            } else {
223                len = this.starts[page][pos + 1] - start;
224            }
225    
226            if (len == 0) {
227                return Optional.<String>absent();
228            }
229            try {
230                // compose String
231                return Optional.of(new String(this.storage[page], start, len, "UTF-8"));
232            } catch (UnsupportedEncodingException ex) {
233                throw new AssertionError(ex);
234            }
235        }
236    
237        /**
238         * Write into serialized format.
239         *
240         * @param oos Stream to write to
241         * @param po Progress observer to track progress. Work units reflect non-skipped stored entities. Will be closed
242         * upon completion.
243         *
244         * @throws IOException re-thrown
245         */
246        public void serialize(
247                ObjectOutputStream oos,
248                SubProgressObserver po) throws IOException {
249    
250            try {
251                po.switchToDeterminate(this.size - this.skips);
252                oos.writeLong(MAGIC_START); // Start marker
253                oos.writeInt(this.size); // Size
254                oos.writeInt(this.skips); // Skips
255                oos.writeInt(this.absentCount); // Absents
256                oos.writeInt(this.pagesize); // Page size
257                oos.writeInt(this.starts.length); // Page count
258                for (int i = 0; i < this.starts.length; i++) {
259                    oos.writeUnshared(this.starts[i]);
260                    oos.writeUnshared(this.storage[i]);
261                    oos.reset();
262                    po.worked(this.starts[i].length);
263                }
264    
265                oos.writeLong(MAGIC_END);
266            } finally {
267                po.done();
268            }
269        }
270    
271        /**
272         * Deserialize from an object input stream.
273         *
274         * @param ois Stream to read from
275         * @param po Progress observer to track progress. Work units reflect non-skipped stored entities. Will be closed
276         * upon completion.
277         * @return deserialized instance
278         */
279        public static CompactStringStorage deserialize(ObjectInputStream ois, SubProgressObserver po)
280                throws IOException, ClassNotFoundException {
281    
282            try {
283    
284                final long start = ois.readLong();
285                if (start != MAGIC_START) {
286                    throw new IllegalArgumentException("Start marker " + MAGIC_START + " expected, got " + start);
287                }
288    
289                final int size = ois.readInt();
290                if (size <= 0) {
291                    throw new IllegalArgumentException("Invalid size " + size);
292                }
293    
294                final int skips = ois.readInt();
295                if (skips < 0) {
296                    throw new IllegalArgumentException("Invalid skips " + skips);
297                }
298                if (skips > size) {
299                    throw new IllegalArgumentException("Invalid skips " + skips + " for size " + size);
300                }
301    
302                final int absents = ois.readInt();
303                final int pagesize = ois.readInt();
304                final int pagecount = ois.readInt();
305    
306                po.switchToDeterminate(size - skips);
307    
308                final int[][] starts = new int[pagecount][];
309                final byte[][] storage = new byte[pagecount][];
310    
311                for (int i = 0; i < starts.length; i++) {
312                    starts[i] = (int[]) ois.readUnshared();
313                    storage[i] = (byte[]) ois.readUnshared();
314                    po.worked(starts[i].length);
315                }
316    
317                final long end = ois.readLong();
318                if (end != MAGIC_END) {
319                    throw new IllegalArgumentException("End marker " + MAGIC_END + " expected, got " + end);
320                }
321    
322                return new CompactStringStorage(pagesize, absents, size, skips, starts, storage);
323            } finally {
324                po.done();
325            }
326        }
327    
328    
329        /**
330         * Builder class.
331         */
332        public static class Builder implements MasterStorages.MasterStorageBuilder<String> {
333    
334            /**
335             * Pagesize in terms of master entity count.
336             */
337            private final int pagesize;
338    
339            /**
340             * Initial skip count.
341             */
342            private final int skips;
343    
344            /**
345             * Next expected master index.
346             */
347            private int nextExpectedMasterIndex;
348    
349            /**
350             * Buffer under construction.
351             */
352            private final ArrayList<byte[]> buffer = new ArrayList<byte[]>();
353    
354            /**
355             * Data pages.
356             */
357            private final ArrayList<byte[]> dataPages = new ArrayList<byte[]>();
358    
359            /**
360             * Start positions.
361             */
362            private final ArrayList<int[]> starts = new ArrayList<int[]>();
363    
364            /**
365             * Construct.
366             *
367             * @param pagesize Expected pagesize
368             * @param skips Skip count
369             */
370            public Builder(int pagesize, int skips) {
371                if (pagesize < 1) {
372                    throw new IllegalArgumentException("Invalid pagesize " + pagesize);
373                }
374                if (skips < 0) {
375                    throw new IllegalArgumentException("Invalid skips " + skips);
376                }
377                this.pagesize = pagesize;
378                this.skips = skips;
379                this.nextExpectedMasterIndex = skips;
380            }
381    
382            /**
383             * Check if we have a full page built.
384             *
385             * @param force Force collapse of partial page
386             */
387            private void checkPages(boolean force) {
388                if (this.buffer.isEmpty()) {
389                    return;
390                }
391                if (this.buffer.size() > this.pagesize) {
392                    throw new IllegalStateException("Buffer contains " + this.buffer.size()
393                            + " items, pagesize is " + this.pagesize);
394                }
395                if (force || this.buffer.size() == this.pagesize) {
396                    // collapse
397                    int bytes = 0;
398                    for (byte[] b : this.buffer) {
399                        bytes += b.length;
400                    }
401    
402                    final int[] startPositions = new int[this.buffer.size()];
403                    final byte[] dataPage = new byte[bytes];
404    
405                    int start = 0;
406                    for (int i = 0; i < this.buffer.size(); i++) {
407                        startPositions[i] = start;
408                        final byte[] data = this.buffer.get(i);
409                        System.arraycopy(data, 0, dataPage, start, data.length);
410                        start += data.length;
411                    }
412    
413                    this.buffer.clear();
414                    this.dataPages.add(dataPage);
415                    this.starts.add(startPositions);
416                }
417    
418            }
419    
420            @Override
421            public void add(int masterIndex, String data) {
422                if (masterIndex != this.nextExpectedMasterIndex) {
423                    throw new IllegalArgumentException("Expected master index " + this.nextExpectedMasterIndex
424                            + " got " + masterIndex);
425                }
426                if (data.isEmpty()) {
427                    addMissing(masterIndex);
428                } else {
429                    try {
430                        this.buffer.add(data.getBytes("UTF-8"));
431                    } catch (UnsupportedEncodingException ex) {
432                        throw new AssertionError(ex);
433                    }
434    
435                    checkPages(false);
436    
437                    this.nextExpectedMasterIndex++;
438                }
439            }
440    
441            @Override
442            public void addMissing(int masterIndex) {
443                if (masterIndex != this.nextExpectedMasterIndex) {
444                    throw new IllegalArgumentException("Expected master index " + this.nextExpectedMasterIndex
445                            + " got " + masterIndex);
446                }
447    
448                this.buffer.add(new byte[0]);
449                checkPages(false);
450    
451                this.nextExpectedMasterIndex++;
452            }
453    
454            @Override
455            public CompactStringStorage build() {
456                checkPages(true);
457                return new CompactStringStorage(this.pagesize, this.skips, this.starts, this.dataPages);
458            }
459    
460        }
461    
462    }