diff --git a/src/main/java/ru/spbau/mit/StreamSerializable.java b/src/main/java/ru/spbau/mit/StreamSerializable.java index 209e56b..0ebddfc 100644 --- a/src/main/java/ru/spbau/mit/StreamSerializable.java +++ b/src/main/java/ru/spbau/mit/StreamSerializable.java @@ -7,11 +7,11 @@ public interface StreamSerializable { /** * @throws SerializationException in case of IOException during serialization */ - void serialize(OutputStream out); + void serialize(OutputStream out) throws SerializationException; /** * Replace current state with data from input stream containing serialized data * @throws SerializationException in case of IOException during deserialization */ - void deserialize(InputStream in); + void deserialize(InputStream in) throws SerializationException; } diff --git a/src/main/java/ru/spbau/mit/StringSetImpl.java b/src/main/java/ru/spbau/mit/StringSetImpl.java new file mode 100644 index 0000000..5262873 --- /dev/null +++ b/src/main/java/ru/spbau/mit/StringSetImpl.java @@ -0,0 +1,224 @@ +package ru.spbau.mit; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class StringSetImpl implements StringSet, StreamSerializable { + private static final int BYTES_IN_INT = 4; + private static final int BITS_IN_BYTE = 8; + private Vertex root; + + public StringSetImpl() { + root = null; + } + + public boolean add(String element) { + Vertex current = traverseWord(element, true); + if (current.isTerminal) { + return false; + } else { + current.isTerminal = true; + while (current != null) { + current.subTreeSize++; + current = current.parent; + } + return true; + } + } + + public boolean contains(String element) { + Vertex current = traverseWord(element, false); + return current != null && current.isTerminal; + } + + public boolean remove(String element) { + Vertex current = traverseWord(element, false); + + if (current == null || !current.isTerminal) { + return false; + } + + current.isTerminal = false; + for (int i = element.length() - 1; i >= 0; --i) { + removeOnEmpty(current, element.charAt(i)); + current = current.parent; + } + + root.subTreeSize--; + if (root.subTreeSize == 0) { + root = null; + } + + return true; + } + + public int size() { + return root == null ? 0 : root.subTreeSize; + } + + public int howManyStartsWithPrefix(String prefix) { + Vertex current = traverseWord(prefix, false); + return current == null ? 0 : current.subTreeSize; + } + + public void serialize(OutputStream out) throws SerializationException { + Vertex.serializeVertex(root, out); + } + + public void deserialize(InputStream in) throws SerializationException { + root = Vertex.deserializeVertex(in, null); + } + + private Vertex traverseWord(String element, boolean addIfNotExists) { + if (root == null) { + if (addIfNotExists) { + root = new Vertex(null); // root only. + } else { + return null; + } + } + + Vertex current = root; + + for (int i = 0; i < element.length(); ++i) { + char c = element.charAt(i); + int stepCharIndex = Vertex.stepCharIndex(c); + if (current.next[stepCharIndex] == null) { + if (addIfNotExists) { + current.next[stepCharIndex] = new Vertex(current); + } else { + return null; + } + } + current = current.next[stepCharIndex]; + } + return current; + } + + private void removeOnEmpty(Vertex current, char stepChar) { + current.subTreeSize--; + if (current.subTreeSize == 0 && current.parent != null) { + int stepCharIndex = Vertex.stepCharIndex(stepChar); + current.parent.next[stepCharIndex] = null; + } + } + + private static void booleanSerialize(boolean b, OutputStream out) throws SerializationException { + try { + if (b) { + out.write(1); + } else { + out.write(0); + } + } catch (IOException e) { + throw new SerializationException(); + } + } + + private static void intSerialize(int num, OutputStream out) throws SerializationException { + final int byteMask = 0xFF; + try { + for (int i = 0; i < BYTES_IN_INT; ++i) { + out.write(num & byteMask); + num >>= BITS_IN_BYTE; + } + } catch (IOException e) { + throw new SerializationException(); + } + } + + private static boolean booleanDeserialize(InputStream in) throws SerializationException { + try { + int i = in.read(); + if (i != 1 && i != 0) { + throw new SerializationException(); + } + return i == 1; + } catch (IOException e) { + throw new SerializationException(); + } + } + + private static int intDeserialize(InputStream in) throws SerializationException { + try { + int num = 0; + for (int i = 0; i < BYTES_IN_INT; ++i) { + final int readNum = in.read(); + num |= readNum << (i * BITS_IN_BYTE); + } + return num; + } catch (IOException e) { + throw new SerializationException(); + } + } + + private static class Vertex implements StreamSerializable { + private static final int CHAR_POWER = 2 * 26; + private static final int VERTEX_MAGIC = 0xAABBCCDD; + private static final int EMPTY_VERTEX_MAGIC = 0xDDCCBBAA; + private final Vertex[] next; + private boolean isTerminal; + private Vertex parent; + private int subTreeSize; + + Vertex(Vertex parent) { + isTerminal = false; + next = new Vertex[CHAR_POWER]; + this.parent = parent; + subTreeSize = 0; + } + + public static void serializeVertex(Vertex v, OutputStream out) throws SerializationException { + if (v == null) { + intSerialize(EMPTY_VERTEX_MAGIC, out); + } else { + v.serialize(out); + } + } + + // Question: constructor would be better? + public static Vertex deserializeVertex(InputStream in, Vertex parent) throws SerializationException { + final int magic = intDeserialize(in); + if (magic == EMPTY_VERTEX_MAGIC) { + return null; + } else if (magic == VERTEX_MAGIC) { + Vertex v = new Vertex(parent); + v.deserialize(in); + return v; + } else { + throw new SerializationException(); + } + } + + public static int stepCharIndex(char stepChar) { + if (Character.isLowerCase(stepChar)) { + return (int) stepChar - 'a'; + } else { + return (CHAR_POWER / 2) + (int) (stepChar - 'A'); + } + } + + public void serialize(OutputStream out) throws SerializationException { + intSerialize(VERTEX_MAGIC, out); + for (int i = 0; i < next.length; ++i) { + if (next[i] == null) { + intSerialize(EMPTY_VERTEX_MAGIC, out); + } else { + next[i].serialize(out); + } + } + booleanSerialize(isTerminal, out); + intSerialize(subTreeSize, out); + } + + public void deserialize(InputStream in) throws SerializationException { + for (int i = 0; i < next.length; ++i) { + next[i] = deserializeVertex(in, this); + } + + isTerminal = booleanDeserialize(in); + subTreeSize = intDeserialize(in); + } + } +} diff --git a/src/test/java/ru/spbau/mit/StreamSerializableTest.java b/src/test/java/ru/spbau/mit/StreamSerializableTest.java new file mode 100644 index 0000000..6b90d71 --- /dev/null +++ b/src/test/java/ru/spbau/mit/StreamSerializableTest.java @@ -0,0 +1,74 @@ +package ru.spbau.mit; + +import org.junit.Test; + +import java.io.*; + +import static org.junit.Assert.*; + +public class StreamSerializableTest { + @Test(expected = SerializationException.class) + public void testFailEmpty() { + testDeserialization(new ByteArrayOutputStream()); + } + + @Test(expected = SerializationException.class) + public void testFailDummyHeader() { + final int dummyHeader = 0xBABEFAFA; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + writeInteger(dummyHeader, out); + testDeserialization(out); + } + + @Test + public void testEmpty() { + StringSetImpl s = new StringSetImpl(); + assertEmptyStringSetImpl(s); // just for sureness. + ByteArrayOutputStream out = new ByteArrayOutputStream(); + s.serialize(out); + StringSetImpl s1 = testDeserialization(out); + assertEmptyStringSetImpl(s1); + + String[] arr = {"abc", "cde", ""}; + for (String str : arr) { + assertTrue(s1.add(str)); + } + + for (String str : arr) { + assertTrue(s1.remove(str)); + } + + out.reset(); + s1.serialize(out); + + StringSetImpl s2 = testDeserialization(out); + assertEmptyStringSetImpl(s2); + } + + private static void writeInteger(int num, OutputStream out) { + final int bytesInInteger = 4; + final int bitsInByte = 8; + try { + for (int i = 0; i < bytesInInteger; ++i) { + final int lowestByte = num & ((1 << bitsInByte) - 1); + num >>= bitsInByte; + out.write(lowestByte); + } + } catch (IOException e) { + fail(); + } + } + + private static StringSetImpl testDeserialization(ByteArrayOutputStream out) throws SerializationException { + StringSetImpl stringSet = new StringSetImpl(); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + ((StreamSerializable) stringSet).deserialize(in); + return stringSet; + } + + private static void assertEmptyStringSetImpl(StringSetImpl s) { + assertNotNull(s); + assertEquals(0, s.size()); + assertEquals(0, s.howManyStartsWithPrefix("")); + } +} diff --git a/src/test/java/ru/spbau/mit/StringSetImplTest.java b/src/test/java/ru/spbau/mit/StringSetImplTest.java new file mode 100644 index 0000000..188d7a0 --- /dev/null +++ b/src/test/java/ru/spbau/mit/StringSetImplTest.java @@ -0,0 +1,208 @@ +package ru.spbau.mit; + +import org.junit.Before; +import org.junit.Test; + +import java.io.*; +import java.util.ArrayList; +import java.util.Random; + +import static org.junit.Assert.*; + +public class StringSetImplTest { + private static final int MAX_STRING_SIZE = 100; + private static final int TESTS_COUNT = 10000; + private static final Random RANDOMIZER; + private static final String RANDOM_SYMBOLS; + private ArrayList stringSets; + private boolean testSerializationBetweenStates; + + static { + final int randomizerSeed = 1123834521; + RANDOMIZER = new Random(randomizerSeed); + + StringBuilder sb = new StringBuilder(); + for (char c = 'a'; c <= 'z'; ++c) { + sb.append(c); + } + + for (char c = 'A'; c <= 'Z'; ++c) { + sb.append(c); + } + + RANDOM_SYMBOLS = sb.toString(); + } + + @Before + public void setUp() throws Exception { + stringSets = new ArrayList<>(); + stringSets.add(new StringSetImpl()); + testSerializationBetweenStates = true; + } + + @Test + public void testSimple() { + StringSetImpl stringSet = new StringSetImpl(); + assertAdd(true, ("abc")); + assertContains(true, ("abc")); + assertSize(1); + assertHowManyStartsWithPrefix("abc", 1); + } + + @Test + public void myTest() { + StringSetImpl stringSet = new StringSetImpl(); + ArrayList alreadyAdded = new ArrayList<>(); + testSerializationBetweenStates = false; + for (int i = 0; i < TESTS_COUNT; i++) { + String newString = randomString(); + assertSize(alreadyAdded.size()); + if (alreadyAdded.contains(newString)) { + assertAdd(false, (newString)); + assertSize(alreadyAdded.size()); + } else { + assertAdd(true, (newString)); + alreadyAdded.add(newString); + } + } + + for (String s : alreadyAdded) { + assertAdd(false, (s)); + assertContains(true, (s)); + } + + for (int i = 0; i < TESTS_COUNT; i++) { + String s = randomString(); + assertContains(alreadyAdded.contains(s), s); + } + testSerializationBetweenStates = true; + } + + @Test + public void stringSetSpecificCases() { + StringSetImpl stringSet = new StringSetImpl(); + String newString = randomString(); + assertSize(0); + assertAdd(true, (newString)); + assertSize(1); + assertAdd(false, (newString)); + assertSize(1); + String prefix = newString.substring(0, newString.length() - 1); + assertAdd(true, (prefix)); + assertRemove(true, (newString)); + assertContains(true, (prefix)); + assertContains(false, (newString)); + } + + @Test + public void typicalCase1() { + StringSetImpl stringSet = new StringSetImpl(); + String newString = randomString(); + assertAdd(true, (newString)); + assertContains(true, (newString)); + assertSize(1); + assertAdd(false, (newString)); + assertSize(1); + String suffix = newString.substring(1); + String prefix = newString.substring(0, newString.length() - 1); + assertContains(false, (suffix)); + assertContains(false, (prefix)); + assertSize(1); + assertRemove(false, (suffix)); + assertRemove(false, (prefix)); + assertRemove(true, (newString)); + assertSize(0); + assertContains(false, (suffix)); + assertContains(false, (prefix)); + assertContains(false, (newString)); + assertRemove(false, (newString)); + assertContains(false, (suffix)); + assertContains(false, (prefix)); + assertContains(false, (newString)); + } + + @Test + public void typicalCase2() { + StringSetImpl stringSet = new StringSetImpl(); + String newString = randomString(); + assertAdd(true, (newString)); + assertAdd(false, (newString)); + + String suffix = newString.substring(1); + String prefix = newString.substring(0, newString.length() - 1); + + final int prefixTestCount = 100; + for (int i = 0; i < prefixTestCount; ++i) { + String checkPrefix = prefix.substring(0, RANDOMIZER.nextInt(prefix.length())); + assertHowManyStartsWithPrefix((checkPrefix), 1); + } + + assertSize(1); + assertRemove(false, (suffix)); + assertRemove(false, (prefix)); + assertRemove(true, (newString)); + assertSize(0); + } + + private static String randomString() { + int size = 1 + RANDOMIZER.nextInt(MAX_STRING_SIZE); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < size; i++) { + int nextCharPosition = RANDOMIZER.nextInt(RANDOM_SYMBOLS.length()); + char nextSymbol = RANDOM_SYMBOLS.charAt(nextCharPosition); + sb.append(nextSymbol); + } + return sb.toString(); + } + + private static StringSetImpl testSerialization(StringSetImpl olds) { + StringSetImpl s = new StringSetImpl(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + olds.serialize(out); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + s.deserialize(in); + return s; + } + + private StringSetImpl lastStringSet() { + int size = stringSets.size(); + assert (size > 0); + return stringSets.get(size - 1); + } + + private void assertAdd(boolean answer, String s) { + if (testSerializationBetweenStates) { + stringSets.add(testSerialization(lastStringSet())); + } + for (StringSetImpl stringSet : stringSets) { + assertEquals(answer, stringSet.add(s)); + } + } + + private void assertContains(boolean answer, String s) { + for (StringSetImpl stringSet : stringSets) { + assertEquals(answer, stringSet.contains(s)); + } + } + + private void assertRemove(boolean answer, String s) { + if (testSerializationBetweenStates) { + stringSets.add(testSerialization(lastStringSet())); + } + for (StringSetImpl stringSet : stringSets) { + assertEquals(answer, stringSet.remove(s)); + } + } + + private void assertSize(int answer) { + for (StringSetImpl stringSet : stringSets) { + assertEquals(answer, stringSet.size()); + } + } + + private void assertHowManyStartsWithPrefix(String prefix, int answer) { + for (StringSetImpl stringSet : stringSets) { + assertEquals(answer, stringSet.howManyStartsWithPrefix(prefix)); + } + } +} diff --git a/src/test/java/ru/spbau/mit/StringSetTest.java b/src/test/java/ru/spbau/mit/StringSetTest.java index 1d3bcdf..1607397 100644 --- a/src/test/java/ru/spbau/mit/StringSetTest.java +++ b/src/test/java/ru/spbau/mit/StringSetTest.java @@ -21,7 +21,7 @@ public void testSimple() { } @Test - public void testSimpleSerialization() { + public void testSimpleSerialization() throws SerializationException { StringSet stringSet = instance(); assertTrue(stringSet.add("abc"));