next
See std.json.Token for documentation of this function.
Function parameters
Parameters
- self:*@This()
The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
Types
- Token
- The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
- TokenType
- This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call.
- Diagnostics
- To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);`
- AllocWhen
- See the documentation for `std.json.Token`.
- Reader
- All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader.
The allocator is only used to track `[]` and `{}` nesting levels.
Functions
- initStreaming
- The allocator is only used to track `[]` and `{}` nesting levels.
- initCompleteInput
- Use this if your input is a single slice.
- feedInput
- Call this whenever you get `error.BufferUnderrun` from `next()`.
- endInput
- Call this when you will no longer call `feedInput()` anymore.
- nextAlloc
- Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
- nextAllocMax
- This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
- allocNextIntoArrayList
- Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
- allocNextIntoArrayListMax
- The next token type must be either `.number` or `.string`.
- skipValue
- This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
- skipUntilStackHeight
- Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
- stackHeight
- The depth of `{}` or `[]` nesting levels at the current position.
- ensureTotalStackCapacity
- Pre allocate memory to hold the given number of nesting levels.
- next
- See `std.json.Token` for documentation of this function.
- peekNextTokenType
- Seeks ahead in the input until the first byte of the next token (or the end of the input)
- validate
- Scan the input and check for malformed JSON.
- isNumberFormattedLikeAnInteger
- For the slice you get from a `Token.number` or `Token.allocated_number`,
Error sets in this namespace
Error Sets
- Error
- The parsing errors are divided into two categories:
Used by `json.reader`.
Values
- default_buffer_size
- Used by `json.reader`.
- default_max_value_len
- For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default.
Source
Implementation
pub fn next(self: *@This()) NextError!Token {
state_loop: while (true) {
switch (self.state) {
.value => {
switch (try self.skipWhitespaceExpectByte()) {
// Object, Array
'{' => {
try self.stack.push(OBJECT_MODE);
self.cursor += 1;
self.state = .object_start;
return .object_begin;
},
'[' => {
try self.stack.push(ARRAY_MODE);
self.cursor += 1;
self.state = .array_start;
return .array_begin;
},
// String
'"' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
continue :state_loop;
},
// Number
'1'...'9' => {
self.value_start = self.cursor;
self.cursor += 1;
self.state = .number_int;
continue :state_loop;
},
'0' => {
self.value_start = self.cursor;
self.cursor += 1;
self.state = .number_leading_zero;
continue :state_loop;
},
'-' => {
self.value_start = self.cursor;
self.cursor += 1;
self.state = .number_minus;
continue :state_loop;
},
// literal values
't' => {
self.cursor += 1;
self.state = .literal_t;
continue :state_loop;
},
'f' => {
self.cursor += 1;
self.state = .literal_f;
continue :state_loop;
},
'n' => {
self.cursor += 1;
self.state = .literal_n;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.post_value => {
if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
const c = self.input[self.cursor];
if (self.string_is_object_key) {
self.string_is_object_key = false;
switch (c) {
':' => {
self.cursor += 1;
self.state = .value;
continue :state_loop;
},
else => return error.SyntaxError,
}
}
switch (c) {
'}' => {
if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError;
self.cursor += 1;
// stay in .post_value state.
return .object_end;
},
']' => {
if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError;
self.cursor += 1;
// stay in .post_value state.
return .array_end;
},
',' => {
switch (self.stack.peek()) {
OBJECT_MODE => {
self.state = .object_post_comma;
},
ARRAY_MODE => {
self.state = .value;
},
}
self.cursor += 1;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.object_start => {
switch (try self.skipWhitespaceExpectByte()) {
'"' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
self.string_is_object_key = true;
continue :state_loop;
},
'}' => {
self.cursor += 1;
_ = self.stack.pop();
self.state = .post_value;
return .object_end;
},
else => return error.SyntaxError,
}
},
.object_post_comma => {
switch (try self.skipWhitespaceExpectByte()) {
'"' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
self.string_is_object_key = true;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.array_start => {
switch (try self.skipWhitespaceExpectByte()) {
']' => {
self.cursor += 1;
_ = self.stack.pop();
self.state = .post_value;
return .array_end;
},
else => {
self.state = .value;
continue :state_loop;
},
}
},
.number_minus => {
if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
switch (self.input[self.cursor]) {
'0' => {
self.cursor += 1;
self.state = .number_leading_zero;
continue :state_loop;
},
'1'...'9' => {
self.cursor += 1;
self.state = .number_int;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.number_leading_zero => {
if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true);
switch (self.input[self.cursor]) {
'.' => {
self.cursor += 1;
self.state = .number_post_dot;
continue :state_loop;
},
'e', 'E' => {
self.cursor += 1;
self.state = .number_post_e;
continue :state_loop;
},
else => {
self.state = .post_value;
return Token{ .number = self.takeValueSlice() };
},
}
},
.number_int => {
while (self.cursor < self.input.len) : (self.cursor += 1) {
switch (self.input[self.cursor]) {
'0'...'9' => continue,
'.' => {
self.cursor += 1;
self.state = .number_post_dot;
continue :state_loop;
},
'e', 'E' => {
self.cursor += 1;
self.state = .number_post_e;
continue :state_loop;
},
else => {
self.state = .post_value;
return Token{ .number = self.takeValueSlice() };
},
}
}
return self.endOfBufferInNumber(true);
},
.number_post_dot => {
if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
switch (self.input[self.cursor]) {
'0'...'9' => {
self.cursor += 1;
self.state = .number_frac;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.number_frac => {
while (self.cursor < self.input.len) : (self.cursor += 1) {
switch (self.input[self.cursor]) {
'0'...'9' => continue,
'e', 'E' => {
self.cursor += 1;
self.state = .number_post_e;
continue :state_loop;
},
else => {
self.state = .post_value;
return Token{ .number = self.takeValueSlice() };
},
}
}
return self.endOfBufferInNumber(true);
},
.number_post_e => {
if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
switch (self.input[self.cursor]) {
'0'...'9' => {
self.cursor += 1;
self.state = .number_exp;
continue :state_loop;
},
'+', '-' => {
self.cursor += 1;
self.state = .number_post_e_sign;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.number_post_e_sign => {
if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
switch (self.input[self.cursor]) {
'0'...'9' => {
self.cursor += 1;
self.state = .number_exp;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.number_exp => {
while (self.cursor < self.input.len) : (self.cursor += 1) {
switch (self.input[self.cursor]) {
'0'...'9' => continue,
else => {
self.state = .post_value;
return Token{ .number = self.takeValueSlice() };
},
}
}
return self.endOfBufferInNumber(true);
},
.string => {
while (self.cursor < self.input.len) : (self.cursor += 1) {
switch (self.input[self.cursor]) {
0...0x1f => return error.SyntaxError, // Bare ASCII control code in string.
// ASCII plain text.
0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue,
// Special characters.
'"' => {
const result = Token{ .string = self.takeValueSlice() };
self.cursor += 1;
self.state = .post_value;
return result;
},
'\\' => {
const slice = self.takeValueSlice();
self.cursor += 1;
self.state = .string_backslash;
if (slice.len > 0) return Token{ .partial_string = slice };
continue :state_loop;
},
// UTF-8 validation.
// See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
0xC2...0xDF => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
continue :state_loop;
},
0xE0 => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte_guard_against_overlong;
continue :state_loop;
},
0xE1...0xEC, 0xEE...0xEF => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
continue :state_loop;
},
0xED => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half;
continue :state_loop;
},
0xF0 => {
self.cursor += 1;
self.state = .string_utf8_third_to_last_byte_guard_against_overlong;
continue :state_loop;
},
0xF1...0xF3 => {
self.cursor += 1;
self.state = .string_utf8_third_to_last_byte;
continue :state_loop;
},
0xF4 => {
self.cursor += 1;
self.state = .string_utf8_third_to_last_byte_guard_against_too_large;
continue :state_loop;
},
0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8.
}
}
if (self.is_end_of_input) return error.UnexpectedEndOfInput;
const slice = self.takeValueSlice();
if (slice.len > 0) return Token{ .partial_string = slice };
return error.BufferUnderrun;
},
.string_backslash => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
'"', '\\', '/' => {
// Since these characters now represent themselves literally,
// we can simply begin the next plaintext slice here.
self.value_start = self.cursor;
self.cursor += 1;
self.state = .string;
continue :state_loop;
},
'b' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
return Token{ .partial_string_escaped_1 = [_]u8{0x08} };
},
'f' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
return Token{ .partial_string_escaped_1 = [_]u8{0x0c} };
},
'n' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
return Token{ .partial_string_escaped_1 = [_]u8{'\n'} };
},
'r' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
return Token{ .partial_string_escaped_1 = [_]u8{'\r'} };
},
't' => {
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
return Token{ .partial_string_escaped_1 = [_]u8{'\t'} };
},
'u' => {
self.cursor += 1;
self.state = .string_backslash_u;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.string_backslash_u => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] = @as(u16, c - '0') << 12;
},
'A'...'F' => {
self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12;
},
'a'...'f' => {
self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12;
},
else => return error.SyntaxError,
}
self.cursor += 1;
self.state = .string_backslash_u_1;
continue :state_loop;
},
.string_backslash_u_1 => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] |= @as(u16, c - '0') << 8;
},
'A'...'F' => {
self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8;
},
'a'...'f' => {
self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8;
},
else => return error.SyntaxError,
}
self.cursor += 1;
self.state = .string_backslash_u_2;
continue :state_loop;
},
.string_backslash_u_2 => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] |= @as(u16, c - '0') << 4;
},
'A'...'F' => {
self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4;
},
'a'...'f' => {
self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4;
},
else => return error.SyntaxError,
}
self.cursor += 1;
self.state = .string_backslash_u_3;
continue :state_loop;
},
.string_backslash_u_3 => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[0] |= c - '0';
},
'A'...'F' => {
self.utf16_code_units[0] |= c - 'A' + 10;
},
'a'...'f' => {
self.utf16_code_units[0] |= c - 'a' + 10;
},
else => return error.SyntaxError,
}
self.cursor += 1;
if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) {
self.state = .string_surrogate_half;
continue :state_loop;
} else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) {
return error.SyntaxError; // Unexpected low surrogate half.
} else {
self.value_start = self.cursor;
self.state = .string;
return partialStringCodepoint(self.utf16_code_units[0]);
}
},
.string_surrogate_half => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
'\\' => {
self.cursor += 1;
self.state = .string_surrogate_half_backslash;
continue :state_loop;
},
else => return error.SyntaxError, // Expected low surrogate half.
}
},
.string_surrogate_half_backslash => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
'u' => {
self.cursor += 1;
self.state = .string_surrogate_half_backslash_u;
continue :state_loop;
},
else => return error.SyntaxError, // Expected low surrogate half.
}
},
.string_surrogate_half_backslash_u => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
'D', 'd' => {
self.cursor += 1;
self.utf16_code_units[1] = 0xD << 12;
self.state = .string_surrogate_half_backslash_u_1;
continue :state_loop;
},
else => return error.SyntaxError, // Expected low surrogate half.
}
},
.string_surrogate_half_backslash_u_1 => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'C'...'F' => {
self.cursor += 1;
self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8;
self.state = .string_surrogate_half_backslash_u_2;
continue :state_loop;
},
'c'...'f' => {
self.cursor += 1;
self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8;
self.state = .string_surrogate_half_backslash_u_2;
continue :state_loop;
},
else => return error.SyntaxError, // Expected low surrogate half.
}
},
.string_surrogate_half_backslash_u_2 => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.cursor += 1;
self.utf16_code_units[1] |= @as(u16, c - '0') << 4;
self.state = .string_surrogate_half_backslash_u_3;
continue :state_loop;
},
'A'...'F' => {
self.cursor += 1;
self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4;
self.state = .string_surrogate_half_backslash_u_3;
continue :state_loop;
},
'a'...'f' => {
self.cursor += 1;
self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4;
self.state = .string_surrogate_half_backslash_u_3;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.string_surrogate_half_backslash_u_3 => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
const c = self.input[self.cursor];
switch (c) {
'0'...'9' => {
self.utf16_code_units[1] |= c - '0';
},
'A'...'F' => {
self.utf16_code_units[1] |= c - 'A' + 10;
},
'a'...'f' => {
self.utf16_code_units[1] |= c - 'a' + 10;
},
else => return error.SyntaxError,
}
self.cursor += 1;
self.value_start = self.cursor;
self.state = .string;
const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable;
return partialStringCodepoint(code_point);
},
.string_utf8_last_byte => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0x80...0xBF => {
self.cursor += 1;
self.state = .string;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.string_utf8_second_to_last_byte => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0x80...0xBF => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.string_utf8_second_to_last_byte_guard_against_overlong => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0xA0...0xBF => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.string_utf8_second_to_last_byte_guard_against_surrogate_half => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0x80...0x9F => {
self.cursor += 1;
self.state = .string_utf8_last_byte;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.string_utf8_third_to_last_byte => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0x80...0xBF => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.string_utf8_third_to_last_byte_guard_against_overlong => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0x90...0xBF => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.string_utf8_third_to_last_byte_guard_against_too_large => {
if (self.cursor >= self.input.len) return self.endOfBufferInString();
switch (self.input[self.cursor]) {
0x80...0x8F => {
self.cursor += 1;
self.state = .string_utf8_second_to_last_byte;
continue :state_loop;
},
else => return error.SyntaxError, // Invalid UTF-8.
}
},
.literal_t => {
switch (try self.expectByte()) {
'r' => {
self.cursor += 1;
self.state = .literal_tr;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_tr => {
switch (try self.expectByte()) {
'u' => {
self.cursor += 1;
self.state = .literal_tru;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_tru => {
switch (try self.expectByte()) {
'e' => {
self.cursor += 1;
self.state = .post_value;
return .true;
},
else => return error.SyntaxError,
}
},
.literal_f => {
switch (try self.expectByte()) {
'a' => {
self.cursor += 1;
self.state = .literal_fa;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_fa => {
switch (try self.expectByte()) {
'l' => {
self.cursor += 1;
self.state = .literal_fal;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_fal => {
switch (try self.expectByte()) {
's' => {
self.cursor += 1;
self.state = .literal_fals;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_fals => {
switch (try self.expectByte()) {
'e' => {
self.cursor += 1;
self.state = .post_value;
return .false;
},
else => return error.SyntaxError,
}
},
.literal_n => {
switch (try self.expectByte()) {
'u' => {
self.cursor += 1;
self.state = .literal_nu;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_nu => {
switch (try self.expectByte()) {
'l' => {
self.cursor += 1;
self.state = .literal_nul;
continue :state_loop;
},
else => return error.SyntaxError,
}
},
.literal_nul => {
switch (try self.expectByte()) {
'l' => {
self.cursor += 1;
self.state = .post_value;
return .null;
},
else => return error.SyntaxError,
}
},
}
unreachable;
}
}