1 module wren.utils; 2 3 // We need buffers of a few different types. 4 // No pre-processor here -- realistically, this should be a mixin template, 5 // but a mixin template... still requires mixins to declare our functions like this. 6 // We also can't technically mark this @nogc, since std.format *does* use the GC 7 // but this should *only* ever be run at compile-time. 8 package string DECLARE_BUFFER(string name, string type) { 9 if (!__ctfe) assert(0, "This function should never be run outside of CTFE."); 10 11 import std.format : format; 12 13 return format!q"{ 14 struct %1$sBuffer { 15 %2$s* data; 16 int count; 17 int capacity; 18 }; 19 20 void wren%1$sBufferInit(%1$sBuffer* buffer) @nogc { 21 buffer.data = null; 22 buffer.capacity = 0; 23 buffer.count = 0; 24 } 25 26 void wren%1$sBufferClear(VM)(VM* vm, %1$sBuffer* buffer) @nogc { 27 import wren.vm : wrenReallocate; 28 wrenReallocate(vm, buffer.data, 0, 0); 29 wren%1$sBufferInit(buffer); 30 } 31 32 void wren%1$sBufferFill(VM)(VM* vm, %1$sBuffer* buffer, %2$s data, int count) @nogc { 33 import wren.vm : wrenReallocate; 34 if (buffer.capacity < buffer.count + count) { 35 int capacity = wrenPowerOf2Ceil(buffer.count + count); 36 buffer.data = cast(%2$s*)wrenReallocate(vm, buffer.data, 37 buffer.capacity * (%2$s).sizeof, capacity * (%2$s).sizeof); 38 buffer.capacity = capacity; 39 } 40 41 for (int i = 0; i < count; i++) { 42 buffer.data[buffer.count++] = data; 43 } 44 } 45 46 void wren%1$sBufferWrite(VM)(VM* vm, %1$sBuffer* buffer, %2$s data) @nogc { 47 wren%1$sBufferFill(vm, buffer, data, 1); 48 } 49 }"(name, type); 50 } 51 52 mixin(DECLARE_BUFFER("Byte", "ubyte")); 53 mixin(DECLARE_BUFFER("Int", "int")); 54 55 // Returns the number of bytes needed to encode [value] in UTF-8. 56 // 57 // Returns 0 if [value] is too large to encode. 58 int wrenUtf8EncodeNumBytes(int value) @nogc 59 { 60 assert(value >= 0, "Cannot encode a negative value."); 61 62 if (value <= 0x7f) return 1; 63 if (value <= 0x7ff) return 2; 64 if (value <= 0xffff) return 3; 65 if (value <= 0x10ffff) return 4; 66 return 0; 67 } 68 69 70 // Encodes value as a series of bytes in [bytes], which is assumed to be large 71 // enough to hold the encoded result. 72 // 73 // Returns the number of written bytes. 74 int wrenUtf8Encode(int value, ubyte* bytes) @nogc 75 { 76 if (value <= 0x7f) 77 { 78 // Single byte (i.e. fits in ASCII). 79 *bytes = value & 0x7f; 80 return 1; 81 } 82 else if (value <= 0x7ff) 83 { 84 // Two byte sequence: 110xxxxx 10xxxxxx. 85 *bytes = 0xc0 | ((value & 0x7c0) >> 6); 86 bytes++; 87 *bytes = 0x80 | (value & 0x3f); 88 return 2; 89 } 90 else if (value <= 0xffff) 91 { 92 // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx. 93 *bytes = 0xe0 | ((value & 0xf000) >> 12); 94 bytes++; 95 *bytes = 0x80 | ((value & 0xfc0) >> 6); 96 bytes++; 97 *bytes = 0x80 | (value & 0x3f); 98 return 3; 99 } 100 else if (value <= 0x10ffff) 101 { 102 // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. 103 *bytes = 0xf0 | ((value & 0x1c0000) >> 18); 104 bytes++; 105 *bytes = 0x80 | ((value & 0x3f000) >> 12); 106 bytes++; 107 *bytes = 0x80 | ((value & 0xfc0) >> 6); 108 bytes++; 109 *bytes = 0x80 | (value & 0x3f); 110 return 4; 111 } 112 113 // Invalid Unicode value. See: http://tools.ietf.org/html/rfc3629 114 assert(0, "Unreachable"); 115 } 116 117 // Decodes the UTF-8 sequence starting at [bytes] (which has max [length]), 118 // returning the code point. 119 // 120 // Returns -1 if the bytes are not a valid UTF-8 sequence. 121 int wrenUtf8Decode(ubyte* bytes, uint length) @nogc 122 { 123 // Single byte (i.e. fits in ASCII). 124 if (*bytes <= 0x7f) return *bytes; 125 126 int value; 127 uint remainingBytes; 128 if ((*bytes & 0xe0) == 0xc0) 129 { 130 // Two byte sequence: 110xxxxx 10xxxxxx. 131 value = *bytes & 0x1f; 132 remainingBytes = 1; 133 } 134 else if ((*bytes & 0xf0) == 0xe0) 135 { 136 // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx. 137 value = *bytes & 0x0f; 138 remainingBytes = 2; 139 } 140 else if ((*bytes & 0xf8) == 0xf0) 141 { 142 // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. 143 value = *bytes & 0x07; 144 remainingBytes = 3; 145 } 146 else 147 { 148 // Invalid UTF-8 sequence. 149 return -1; 150 } 151 152 // Don't read past the end of the buffer on truncated UTF-8. 153 if (remainingBytes > length - 1) return -1; 154 155 while (remainingBytes > 0) 156 { 157 bytes++; 158 remainingBytes--; 159 160 // Remaining bytes must be of form 10xxxxxx. 161 if ((*bytes & 0xc0) != 0x80) return -1; 162 163 value = value << 6 | (*bytes & 0x3f); 164 } 165 166 return value; 167 } 168 169 // Returns the number of bytes in the UTF-8 sequence starting with [byte]. 170 // 171 // If the character at that index is not the beginning of a UTF-8 sequence, 172 // returns 0. 173 int wrenUtf8DecodeNumBytes(ubyte byte_) @nogc 174 { 175 // If the byte starts with 10xxxxx, it's the middle of a UTF-8 sequence, so 176 // don't count it at all. 177 if ((byte_ & 0xc0) == 0x80) return 0; 178 179 // The first byte's high bits tell us how many bytes are in the UTF-8 180 // sequence. 181 if ((byte_ & 0xf8) == 0xf0) return 4; 182 if ((byte_ & 0xf0) == 0xe0) return 3; 183 if ((byte_ & 0xe0) == 0xc0) return 2; 184 return 1; 185 } 186 187 // From: http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float 188 // Returns the smallest power of two that is equal to or greater than [n]. 189 int wrenPowerOf2Ceil(int n) @nogc 190 { 191 n--; 192 n |= n >> 1; 193 n |= n >> 2; 194 n |= n >> 4; 195 n |= n >> 8; 196 n |= n >> 16; 197 n++; 198 199 return n; 200 } 201 202 // Validates that [value] is within `[0, count)`. Also allows 203 // negative indices which map backwards from the end. Returns the valid positive 204 // index value. If invalid, returns `uint.max`. 205 uint wrenValidateIndex(uint count, long value) @nogc 206 { 207 // Negative indices count from the end. 208 if (value < 0) value = count + value; 209 210 // Check bounds. 211 if (value >= 0 && value < count) return cast(uint)value; 212 213 return uint.max; 214 }