scannerc.go

   1//
   2// Copyright (c) 2011-2019 Canonical Ltd
   3// Copyright (c) 2006-2010 Kirill Simonov
   4//
   5// Permission is hereby granted, free of charge, to any person obtaining a copy of
   6// this software and associated documentation files (the "Software"), to deal in
   7// the Software without restriction, including without limitation the rights to
   8// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9// of the Software, and to permit persons to whom the Software is furnished to do
  10// so, subject to the following conditions:
  11//
  12// The above copyright notice and this permission notice shall be included in all
  13// copies or substantial portions of the Software.
  14//
  15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21// SOFTWARE.
  22
  23package yaml
  24
  25import (
  26	"bytes"
  27	"fmt"
  28)
  29
  30// Introduction
  31// ************
  32//
  33// The following notes assume that you are familiar with the YAML specification
  34// (http://yaml.org/spec/1.2/spec.html).  We mostly follow it, although in
  35// some cases we are less restrictive that it requires.
  36//
  37// The process of transforming a YAML stream into a sequence of events is
  38// divided on two steps: Scanning and Parsing.
  39//
  40// The Scanner transforms the input stream into a sequence of tokens, while the
  41// parser transform the sequence of tokens produced by the Scanner into a
  42// sequence of parsing events.
  43//
  44// The Scanner is rather clever and complicated. The Parser, on the contrary,
  45// is a straightforward implementation of a recursive-descendant parser (or,
  46// LL(1) parser, as it is usually called).
  47//
  48// Actually there are two issues of Scanning that might be called "clever", the
  49// rest is quite straightforward.  The issues are "block collection start" and
  50// "simple keys".  Both issues are explained below in details.
  51//
  52// Here the Scanning step is explained and implemented.  We start with the list
  53// of all the tokens produced by the Scanner together with short descriptions.
  54//
  55// Now, tokens:
  56//
  57//      STREAM-START(encoding)          # The stream start.
  58//      STREAM-END                      # The stream end.
  59//      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
  60//      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
  61//      DOCUMENT-START                  # '---'
  62//      DOCUMENT-END                    # '...'
  63//      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
  64//      BLOCK-MAPPING-START             # sequence or a block mapping.
  65//      BLOCK-END                       # Indentation decrease.
  66//      FLOW-SEQUENCE-START             # '['
  67//      FLOW-SEQUENCE-END               # ']'
  68//      BLOCK-SEQUENCE-START            # '{'
  69//      BLOCK-SEQUENCE-END              # '}'
  70//      BLOCK-ENTRY                     # '-'
  71//      FLOW-ENTRY                      # ','
  72//      KEY                             # '?' or nothing (simple keys).
  73//      VALUE                           # ':'
  74//      ALIAS(anchor)                   # '*anchor'
  75//      ANCHOR(anchor)                  # '&anchor'
  76//      TAG(handle,suffix)              # '!handle!suffix'
  77//      SCALAR(value,style)             # A scalar.
  78//
  79// The following two tokens are "virtual" tokens denoting the beginning and the
  80// end of the stream:
  81//
  82//      STREAM-START(encoding)
  83//      STREAM-END
  84//
  85// We pass the information about the input stream encoding with the
  86// STREAM-START token.
  87//
  88// The next two tokens are responsible for tags:
  89//
  90//      VERSION-DIRECTIVE(major,minor)
  91//      TAG-DIRECTIVE(handle,prefix)
  92//
  93// Example:
  94//
  95//      %YAML   1.1
  96//      %TAG    !   !foo
  97//      %TAG    !yaml!  tag:yaml.org,2002:
  98//      ---
  99//
 100// The correspoding sequence of tokens:
 101//
 102//      STREAM-START(utf-8)
 103//      VERSION-DIRECTIVE(1,1)
 104//      TAG-DIRECTIVE("!","!foo")
 105//      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
 106//      DOCUMENT-START
 107//      STREAM-END
 108//
 109// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
 110// line.
 111//
 112// The document start and end indicators are represented by:
 113//
 114//      DOCUMENT-START
 115//      DOCUMENT-END
 116//
 117// Note that if a YAML stream contains an implicit document (without '---'
 118// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
 119// produced.
 120//
 121// In the following examples, we present whole documents together with the
 122// produced tokens.
 123//
 124//      1. An implicit document:
 125//
 126//          'a scalar'
 127//
 128//      Tokens:
 129//
 130//          STREAM-START(utf-8)
 131//          SCALAR("a scalar",single-quoted)
 132//          STREAM-END
 133//
 134//      2. An explicit document:
 135//
 136//          ---
 137//          'a scalar'
 138//          ...
 139//
 140//      Tokens:
 141//
 142//          STREAM-START(utf-8)
 143//          DOCUMENT-START
 144//          SCALAR("a scalar",single-quoted)
 145//          DOCUMENT-END
 146//          STREAM-END
 147//
 148//      3. Several documents in a stream:
 149//
 150//          'a scalar'
 151//          ---
 152//          'another scalar'
 153//          ---
 154//          'yet another scalar'
 155//
 156//      Tokens:
 157//
 158//          STREAM-START(utf-8)
 159//          SCALAR("a scalar",single-quoted)
 160//          DOCUMENT-START
 161//          SCALAR("another scalar",single-quoted)
 162//          DOCUMENT-START
 163//          SCALAR("yet another scalar",single-quoted)
 164//          STREAM-END
 165//
 166// We have already introduced the SCALAR token above.  The following tokens are
 167// used to describe aliases, anchors, tag, and scalars:
 168//
 169//      ALIAS(anchor)
 170//      ANCHOR(anchor)
 171//      TAG(handle,suffix)
 172//      SCALAR(value,style)
 173//
 174// The following series of examples illustrate the usage of these tokens:
 175//
 176//      1. A recursive sequence:
 177//
 178//          &A [ *A ]
 179//
 180//      Tokens:
 181//
 182//          STREAM-START(utf-8)
 183//          ANCHOR("A")
 184//          FLOW-SEQUENCE-START
 185//          ALIAS("A")
 186//          FLOW-SEQUENCE-END
 187//          STREAM-END
 188//
 189//      2. A tagged scalar:
 190//
 191//          !!float "3.14"  # A good approximation.
 192//
 193//      Tokens:
 194//
 195//          STREAM-START(utf-8)
 196//          TAG("!!","float")
 197//          SCALAR("3.14",double-quoted)
 198//          STREAM-END
 199//
 200//      3. Various scalar styles:
 201//
 202//          --- # Implicit empty plain scalars do not produce tokens.
 203//          --- a plain scalar
 204//          --- 'a single-quoted scalar'
 205//          --- "a double-quoted scalar"
 206//          --- |-
 207//            a literal scalar
 208//          --- >-
 209//            a folded
 210//            scalar
 211//
 212//      Tokens:
 213//
 214//          STREAM-START(utf-8)
 215//          DOCUMENT-START
 216//          DOCUMENT-START
 217//          SCALAR("a plain scalar",plain)
 218//          DOCUMENT-START
 219//          SCALAR("a single-quoted scalar",single-quoted)
 220//          DOCUMENT-START
 221//          SCALAR("a double-quoted scalar",double-quoted)
 222//          DOCUMENT-START
 223//          SCALAR("a literal scalar",literal)
 224//          DOCUMENT-START
 225//          SCALAR("a folded scalar",folded)
 226//          STREAM-END
 227//
 228// Now it's time to review collection-related tokens. We will start with
 229// flow collections:
 230//
 231//      FLOW-SEQUENCE-START
 232//      FLOW-SEQUENCE-END
 233//      FLOW-MAPPING-START
 234//      FLOW-MAPPING-END
 235//      FLOW-ENTRY
 236//      KEY
 237//      VALUE
 238//
 239// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
 240// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
 241// correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
 242// indicators '?' and ':', which are used for denoting mapping keys and values,
 243// are represented by the KEY and VALUE tokens.
 244//
 245// The following examples show flow collections:
 246//
 247//      1. A flow sequence:
 248//
 249//          [item 1, item 2, item 3]
 250//
 251//      Tokens:
 252//
 253//          STREAM-START(utf-8)
 254//          FLOW-SEQUENCE-START
 255//          SCALAR("item 1",plain)
 256//          FLOW-ENTRY
 257//          SCALAR("item 2",plain)
 258//          FLOW-ENTRY
 259//          SCALAR("item 3",plain)
 260//          FLOW-SEQUENCE-END
 261//          STREAM-END
 262//
 263//      2. A flow mapping:
 264//
 265//          {
 266//              a simple key: a value,  # Note that the KEY token is produced.
 267//              ? a complex key: another value,
 268//          }
 269//
 270//      Tokens:
 271//
 272//          STREAM-START(utf-8)
 273//          FLOW-MAPPING-START
 274//          KEY
 275//          SCALAR("a simple key",plain)
 276//          VALUE
 277//          SCALAR("a value",plain)
 278//          FLOW-ENTRY
 279//          KEY
 280//          SCALAR("a complex key",plain)
 281//          VALUE
 282//          SCALAR("another value",plain)
 283//          FLOW-ENTRY
 284//          FLOW-MAPPING-END
 285//          STREAM-END
 286//
 287// A simple key is a key which is not denoted by the '?' indicator.  Note that
 288// the Scanner still produce the KEY token whenever it encounters a simple key.
 289//
 290// For scanning block collections, the following tokens are used (note that we
 291// repeat KEY and VALUE here):
 292//
 293//      BLOCK-SEQUENCE-START
 294//      BLOCK-MAPPING-START
 295//      BLOCK-END
 296//      BLOCK-ENTRY
 297//      KEY
 298//      VALUE
 299//
 300// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
 301// increase that precedes a block collection (cf. the INDENT token in Python).
 302// The token BLOCK-END denote indentation decrease that ends a block collection
 303// (cf. the DEDENT token in Python).  However YAML has some syntax pecularities
 304// that makes detections of these tokens more complex.
 305//
 306// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
 307// '-', '?', and ':' correspondingly.
 308//
 309// The following examples show how the tokens BLOCK-SEQUENCE-START,
 310// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
 311//
 312//      1. Block sequences:
 313//
 314//          - item 1
 315//          - item 2
 316//          -
 317//            - item 3.1
 318//            - item 3.2
 319//          -
 320//            key 1: value 1
 321//            key 2: value 2
 322//
 323//      Tokens:
 324//
 325//          STREAM-START(utf-8)
 326//          BLOCK-SEQUENCE-START
 327//          BLOCK-ENTRY
 328//          SCALAR("item 1",plain)
 329//          BLOCK-ENTRY
 330//          SCALAR("item 2",plain)
 331//          BLOCK-ENTRY
 332//          BLOCK-SEQUENCE-START
 333//          BLOCK-ENTRY
 334//          SCALAR("item 3.1",plain)
 335//          BLOCK-ENTRY
 336//          SCALAR("item 3.2",plain)
 337//          BLOCK-END
 338//          BLOCK-ENTRY
 339//          BLOCK-MAPPING-START
 340//          KEY
 341//          SCALAR("key 1",plain)
 342//          VALUE
 343//          SCALAR("value 1",plain)
 344//          KEY
 345//          SCALAR("key 2",plain)
 346//          VALUE
 347//          SCALAR("value 2",plain)
 348//          BLOCK-END
 349//          BLOCK-END
 350//          STREAM-END
 351//
 352//      2. Block mappings:
 353//
 354//          a simple key: a value   # The KEY token is produced here.
 355//          ? a complex key
 356//          : another value
 357//          a mapping:
 358//            key 1: value 1
 359//            key 2: value 2
 360//          a sequence:
 361//            - item 1
 362//            - item 2
 363//
 364//      Tokens:
 365//
 366//          STREAM-START(utf-8)
 367//          BLOCK-MAPPING-START
 368//          KEY
 369//          SCALAR("a simple key",plain)
 370//          VALUE
 371//          SCALAR("a value",plain)
 372//          KEY
 373//          SCALAR("a complex key",plain)
 374//          VALUE
 375//          SCALAR("another value",plain)
 376//          KEY
 377//          SCALAR("a mapping",plain)
 378//          BLOCK-MAPPING-START
 379//          KEY
 380//          SCALAR("key 1",plain)
 381//          VALUE
 382//          SCALAR("value 1",plain)
 383//          KEY
 384//          SCALAR("key 2",plain)
 385//          VALUE
 386//          SCALAR("value 2",plain)
 387//          BLOCK-END
 388//          KEY
 389//          SCALAR("a sequence",plain)
 390//          VALUE
 391//          BLOCK-SEQUENCE-START
 392//          BLOCK-ENTRY
 393//          SCALAR("item 1",plain)
 394//          BLOCK-ENTRY
 395//          SCALAR("item 2",plain)
 396//          BLOCK-END
 397//          BLOCK-END
 398//          STREAM-END
 399//
 400// YAML does not always require to start a new block collection from a new
 401// line.  If the current line contains only '-', '?', and ':' indicators, a new
 402// block collection may start at the current line.  The following examples
 403// illustrate this case:
 404//
 405//      1. Collections in a sequence:
 406//
 407//          - - item 1
 408//            - item 2
 409//          - key 1: value 1
 410//            key 2: value 2
 411//          - ? complex key
 412//            : complex value
 413//
 414//      Tokens:
 415//
 416//          STREAM-START(utf-8)
 417//          BLOCK-SEQUENCE-START
 418//          BLOCK-ENTRY
 419//          BLOCK-SEQUENCE-START
 420//          BLOCK-ENTRY
 421//          SCALAR("item 1",plain)
 422//          BLOCK-ENTRY
 423//          SCALAR("item 2",plain)
 424//          BLOCK-END
 425//          BLOCK-ENTRY
 426//          BLOCK-MAPPING-START
 427//          KEY
 428//          SCALAR("key 1",plain)
 429//          VALUE
 430//          SCALAR("value 1",plain)
 431//          KEY
 432//          SCALAR("key 2",plain)
 433//          VALUE
 434//          SCALAR("value 2",plain)
 435//          BLOCK-END
 436//          BLOCK-ENTRY
 437//          BLOCK-MAPPING-START
 438//          KEY
 439//          SCALAR("complex key")
 440//          VALUE
 441//          SCALAR("complex value")
 442//          BLOCK-END
 443//          BLOCK-END
 444//          STREAM-END
 445//
 446//      2. Collections in a mapping:
 447//
 448//          ? a sequence
 449//          : - item 1
 450//            - item 2
 451//          ? a mapping
 452//          : key 1: value 1
 453//            key 2: value 2
 454//
 455//      Tokens:
 456//
 457//          STREAM-START(utf-8)
 458//          BLOCK-MAPPING-START
 459//          KEY
 460//          SCALAR("a sequence",plain)
 461//          VALUE
 462//          BLOCK-SEQUENCE-START
 463//          BLOCK-ENTRY
 464//          SCALAR("item 1",plain)
 465//          BLOCK-ENTRY
 466//          SCALAR("item 2",plain)
 467//          BLOCK-END
 468//          KEY
 469//          SCALAR("a mapping",plain)
 470//          VALUE
 471//          BLOCK-MAPPING-START
 472//          KEY
 473//          SCALAR("key 1",plain)
 474//          VALUE
 475//          SCALAR("value 1",plain)
 476//          KEY
 477//          SCALAR("key 2",plain)
 478//          VALUE
 479//          SCALAR("value 2",plain)
 480//          BLOCK-END
 481//          BLOCK-END
 482//          STREAM-END
 483//
 484// YAML also permits non-indented sequences if they are included into a block
 485// mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
 486//
 487//      key:
 488//      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
 489//      - item 2
 490//
 491// Tokens:
 492//
 493//      STREAM-START(utf-8)
 494//      BLOCK-MAPPING-START
 495//      KEY
 496//      SCALAR("key",plain)
 497//      VALUE
 498//      BLOCK-ENTRY
 499//      SCALAR("item 1",plain)
 500//      BLOCK-ENTRY
 501//      SCALAR("item 2",plain)
 502//      BLOCK-END
 503//
 504
 505// Ensure that the buffer contains the required number of characters.
 506// Return true on success, false on failure (reader error or memory error).
 507func cache(parser *yaml_parser_t, length int) bool {
 508	// [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B)
 509	return parser.unread >= length || yaml_parser_update_buffer(parser, length)
 510}
 511
 512// Advance the buffer pointer.
 513func skip(parser *yaml_parser_t) {
 514	if !is_blank(parser.buffer, parser.buffer_pos) {
 515		parser.newlines = 0
 516	}
 517	parser.mark.index++
 518	parser.mark.column++
 519	parser.unread--
 520	parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
 521}
 522
 523func skip_line(parser *yaml_parser_t) {
 524	if is_crlf(parser.buffer, parser.buffer_pos) {
 525		parser.mark.index += 2
 526		parser.mark.column = 0
 527		parser.mark.line++
 528		parser.unread -= 2
 529		parser.buffer_pos += 2
 530		parser.newlines++
 531	} else if is_break(parser.buffer, parser.buffer_pos) {
 532		parser.mark.index++
 533		parser.mark.column = 0
 534		parser.mark.line++
 535		parser.unread--
 536		parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
 537		parser.newlines++
 538	}
 539}
 540
 541// Copy a character to a string buffer and advance pointers.
 542func read(parser *yaml_parser_t, s []byte) []byte {
 543	if !is_blank(parser.buffer, parser.buffer_pos) {
 544		parser.newlines = 0
 545	}
 546	w := width(parser.buffer[parser.buffer_pos])
 547	if w == 0 {
 548		panic("invalid character sequence")
 549	}
 550	if len(s) == 0 {
 551		s = make([]byte, 0, 32)
 552	}
 553	if w == 1 && len(s)+w <= cap(s) {
 554		s = s[:len(s)+1]
 555		s[len(s)-1] = parser.buffer[parser.buffer_pos]
 556		parser.buffer_pos++
 557	} else {
 558		s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...)
 559		parser.buffer_pos += w
 560	}
 561	parser.mark.index++
 562	parser.mark.column++
 563	parser.unread--
 564	return s
 565}
 566
 567// Copy a line break character to a string buffer and advance pointers.
 568func read_line(parser *yaml_parser_t, s []byte) []byte {
 569	buf := parser.buffer
 570	pos := parser.buffer_pos
 571	switch {
 572	case buf[pos] == '\r' && buf[pos+1] == '\n':
 573		// CR LF . LF
 574		s = append(s, '\n')
 575		parser.buffer_pos += 2
 576		parser.mark.index++
 577		parser.unread--
 578	case buf[pos] == '\r' || buf[pos] == '\n':
 579		// CR|LF . LF
 580		s = append(s, '\n')
 581		parser.buffer_pos += 1
 582	case buf[pos] == '\xC2' && buf[pos+1] == '\x85':
 583		// NEL . LF
 584		s = append(s, '\n')
 585		parser.buffer_pos += 2
 586	case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'):
 587		// LS|PS . LS|PS
 588		s = append(s, buf[parser.buffer_pos:pos+3]...)
 589		parser.buffer_pos += 3
 590	default:
 591		return s
 592	}
 593	parser.mark.index++
 594	parser.mark.column = 0
 595	parser.mark.line++
 596	parser.unread--
 597	parser.newlines++
 598	return s
 599}
 600
 601// Get the next token.
 602func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool {
 603	// Erase the token object.
 604	*token = yaml_token_t{} // [Go] Is this necessary?
 605
 606	// No tokens after STREAM-END or error.
 607	if parser.stream_end_produced || parser.error != yaml_NO_ERROR {
 608		return true
 609	}
 610
 611	// Ensure that the tokens queue contains enough tokens.
 612	if !parser.token_available {
 613		if !yaml_parser_fetch_more_tokens(parser) {
 614			return false
 615		}
 616	}
 617
 618	// Fetch the next token from the queue.
 619	*token = parser.tokens[parser.tokens_head]
 620	parser.tokens_head++
 621	parser.tokens_parsed++
 622	parser.token_available = false
 623
 624	if token.typ == yaml_STREAM_END_TOKEN {
 625		parser.stream_end_produced = true
 626	}
 627	return true
 628}
 629
 630// Set the scanner error and return false.
 631func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool {
 632	parser.error = yaml_SCANNER_ERROR
 633	parser.context = context
 634	parser.context_mark = context_mark
 635	parser.problem = problem
 636	parser.problem_mark = parser.mark
 637	return false
 638}
 639
 640func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool {
 641	context := "while parsing a tag"
 642	if directive {
 643		context = "while parsing a %TAG directive"
 644	}
 645	return yaml_parser_set_scanner_error(parser, context, context_mark, problem)
 646}
 647
 648func trace(args ...interface{}) func() {
 649	pargs := append([]interface{}{"+++"}, args...)
 650	fmt.Println(pargs...)
 651	pargs = append([]interface{}{"---"}, args...)
 652	return func() { fmt.Println(pargs...) }
 653}
 654
 655// Ensure that the tokens queue contains at least one token which can be
 656// returned to the Parser.
 657func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
 658	// While we need more tokens to fetch, do it.
 659	for {
 660		// [Go] The comment parsing logic requires a lookahead of two tokens
 661		// so that foot comments may be parsed in time of associating them
 662		// with the tokens that are parsed before them, and also for line
 663		// comments to be transformed into head comments in some edge cases.
 664		if parser.tokens_head < len(parser.tokens)-2 {
 665			// If a potential simple key is at the head position, we need to fetch
 666			// the next token to disambiguate it.
 667			head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
 668			if !ok {
 669				break
 670			} else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
 671				return false
 672			} else if !valid {
 673				break
 674			}
 675		}
 676		// Fetch the next token.
 677		if !yaml_parser_fetch_next_token(parser) {
 678			return false
 679		}
 680	}
 681
 682	parser.token_available = true
 683	return true
 684}
 685
 686// The dispatcher for token fetchers.
 687func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
 688	// Ensure that the buffer is initialized.
 689	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
 690		return false
 691	}
 692
 693	// Check if we just started scanning.  Fetch STREAM-START then.
 694	if !parser.stream_start_produced {
 695		return yaml_parser_fetch_stream_start(parser)
 696	}
 697
 698	scan_mark := parser.mark
 699
 700	// Eat whitespaces and comments until we reach the next token.
 701	if !yaml_parser_scan_to_next_token(parser) {
 702		return false
 703	}
 704
 705	// [Go] While unrolling indents, transform the head comments of prior
 706	// indentation levels observed after scan_start into foot comments at
 707	// the respective indexes.
 708
 709	// Check the indentation level against the current column.
 710	if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) {
 711		return false
 712	}
 713
 714	// Ensure that the buffer contains at least 4 characters.  4 is the length
 715	// of the longest indicators ('--- ' and '... ').
 716	if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
 717		return false
 718	}
 719
 720	// Is it the end of the stream?
 721	if is_z(parser.buffer, parser.buffer_pos) {
 722		return yaml_parser_fetch_stream_end(parser)
 723	}
 724
 725	// Is it a directive?
 726	if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' {
 727		return yaml_parser_fetch_directive(parser)
 728	}
 729
 730	buf := parser.buffer
 731	pos := parser.buffer_pos
 732
 733	// Is it the document start indicator?
 734	if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) {
 735		return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN)
 736	}
 737
 738	// Is it the document end indicator?
 739	if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) {
 740		return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN)
 741	}
 742
 743	comment_mark := parser.mark
 744	if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') {
 745		// Associate any following comments with the prior token.
 746		comment_mark = parser.tokens[len(parser.tokens)-1].start_mark
 747	}
 748	defer func() {
 749		if !ok {
 750			return
 751		}
 752		if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN {
 753			// Sequence indicators alone have no line comments. It becomes
 754			// a head comment for whatever follows.
 755			return
 756		}
 757		if !yaml_parser_scan_line_comment(parser, comment_mark) {
 758			ok = false
 759			return
 760		}
 761	}()
 762
 763	// Is it the flow sequence start indicator?
 764	if buf[pos] == '[' {
 765		return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN)
 766	}
 767
 768	// Is it the flow mapping start indicator?
 769	if parser.buffer[parser.buffer_pos] == '{' {
 770		return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN)
 771	}
 772
 773	// Is it the flow sequence end indicator?
 774	if parser.buffer[parser.buffer_pos] == ']' {
 775		return yaml_parser_fetch_flow_collection_end(parser,
 776			yaml_FLOW_SEQUENCE_END_TOKEN)
 777	}
 778
 779	// Is it the flow mapping end indicator?
 780	if parser.buffer[parser.buffer_pos] == '}' {
 781		return yaml_parser_fetch_flow_collection_end(parser,
 782			yaml_FLOW_MAPPING_END_TOKEN)
 783	}
 784
 785	// Is it the flow entry indicator?
 786	if parser.buffer[parser.buffer_pos] == ',' {
 787		return yaml_parser_fetch_flow_entry(parser)
 788	}
 789
 790	// Is it the block entry indicator?
 791	if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) {
 792		return yaml_parser_fetch_block_entry(parser)
 793	}
 794
 795	// Is it the key indicator?
 796	if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
 797		return yaml_parser_fetch_key(parser)
 798	}
 799
 800	// Is it the value indicator?
 801	if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
 802		return yaml_parser_fetch_value(parser)
 803	}
 804
 805	// Is it an alias?
 806	if parser.buffer[parser.buffer_pos] == '*' {
 807		return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN)
 808	}
 809
 810	// Is it an anchor?
 811	if parser.buffer[parser.buffer_pos] == '&' {
 812		return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN)
 813	}
 814
 815	// Is it a tag?
 816	if parser.buffer[parser.buffer_pos] == '!' {
 817		return yaml_parser_fetch_tag(parser)
 818	}
 819
 820	// Is it a literal scalar?
 821	if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 {
 822		return yaml_parser_fetch_block_scalar(parser, true)
 823	}
 824
 825	// Is it a folded scalar?
 826	if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 {
 827		return yaml_parser_fetch_block_scalar(parser, false)
 828	}
 829
 830	// Is it a single-quoted scalar?
 831	if parser.buffer[parser.buffer_pos] == '\'' {
 832		return yaml_parser_fetch_flow_scalar(parser, true)
 833	}
 834
 835	// Is it a double-quoted scalar?
 836	if parser.buffer[parser.buffer_pos] == '"' {
 837		return yaml_parser_fetch_flow_scalar(parser, false)
 838	}
 839
 840	// Is it a plain scalar?
 841	//
 842	// A plain scalar may start with any non-blank characters except
 843	//
 844	//      '-', '?', ':', ',', '[', ']', '{', '}',
 845	//      '#', '&', '*', '!', '|', '>', '\'', '\"',
 846	//      '%', '@', '`'.
 847	//
 848	// In the block context (and, for the '-' indicator, in the flow context
 849	// too), it may also start with the characters
 850	//
 851	//      '-', '?', ':'
 852	//
 853	// if it is followed by a non-space character.
 854	//
 855	// The last rule is more restrictive than the specification requires.
 856	// [Go] TODO Make this logic more reasonable.
 857	//switch parser.buffer[parser.buffer_pos] {
 858	//case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`':
 859	//}
 860	if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' ||
 861		parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' ||
 862		parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' ||
 863		parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
 864		parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' ||
 865		parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' ||
 866		parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' ||
 867		parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' ||
 868		parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' ||
 869		parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') ||
 870		(parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) ||
 871		(parser.flow_level == 0 &&
 872			(parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') &&
 873			!is_blankz(parser.buffer, parser.buffer_pos+1)) {
 874		return yaml_parser_fetch_plain_scalar(parser)
 875	}
 876
 877	// If we don't determine the token type so far, it is an error.
 878	return yaml_parser_set_scanner_error(parser,
 879		"while scanning for the next token", parser.mark,
 880		"found character that cannot start any token")
 881}
 882
 883func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) {
 884	if !simple_key.possible {
 885		return false, true
 886	}
 887
 888	// The 1.2 specification says:
 889	//
 890	//     "If the ? indicator is omitted, parsing needs to see past the
 891	//     implicit key to recognize it as such. To limit the amount of
 892	//     lookahead required, the “:” indicator must appear at most 1024
 893	//     Unicode characters beyond the start of the key. In addition, the key
 894	//     is restricted to a single line."
 895	//
 896	if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
 897		// Check if the potential simple key to be removed is required.
 898		if simple_key.required {
 899			return false, yaml_parser_set_scanner_error(parser,
 900				"while scanning a simple key", simple_key.mark,
 901				"could not find expected ':'")
 902		}
 903		simple_key.possible = false
 904		return false, true
 905	}
 906	return true, true
 907}
 908
 909// Check if a simple key may start at the current position and add it if
 910// needed.
 911func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
 912	// A simple key is required at the current position if the scanner is in
 913	// the block context and the current column coincides with the indentation
 914	// level.
 915
 916	required := parser.flow_level == 0 && parser.indent == parser.mark.column
 917
 918	//
 919	// If the current position may start a simple key, save it.
 920	//
 921	if parser.simple_key_allowed {
 922		simple_key := yaml_simple_key_t{
 923			possible:     true,
 924			required:     required,
 925			token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
 926			mark:         parser.mark,
 927		}
 928
 929		if !yaml_parser_remove_simple_key(parser) {
 930			return false
 931		}
 932		parser.simple_keys[len(parser.simple_keys)-1] = simple_key
 933		parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
 934	}
 935	return true
 936}
 937
 938// Remove a potential simple key at the current flow level.
 939func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
 940	i := len(parser.simple_keys) - 1
 941	if parser.simple_keys[i].possible {
 942		// If the key is required, it is an error.
 943		if parser.simple_keys[i].required {
 944			return yaml_parser_set_scanner_error(parser,
 945				"while scanning a simple key", parser.simple_keys[i].mark,
 946				"could not find expected ':'")
 947		}
 948		// Remove the key from the stack.
 949		parser.simple_keys[i].possible = false
 950		delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
 951	}
 952	return true
 953}
 954
 955// max_flow_level limits the flow_level
 956const max_flow_level = 10000
 957
 958// Increase the flow level and resize the simple key list if needed.
 959func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
 960	// Reset the simple key on the next level.
 961	parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
 962		possible:     false,
 963		required:     false,
 964		token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
 965		mark:         parser.mark,
 966	})
 967
 968	// Increase the flow level.
 969	parser.flow_level++
 970	if parser.flow_level > max_flow_level {
 971		return yaml_parser_set_scanner_error(parser,
 972			"while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
 973			fmt.Sprintf("exceeded max depth of %d", max_flow_level))
 974	}
 975	return true
 976}
 977
 978// Decrease the flow level.
 979func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
 980	if parser.flow_level > 0 {
 981		parser.flow_level--
 982		last := len(parser.simple_keys) - 1
 983		delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
 984		parser.simple_keys = parser.simple_keys[:last]
 985	}
 986	return true
 987}
 988
 989// max_indents limits the indents stack size
 990const max_indents = 10000
 991
 992// Push the current indentation level to the stack and set the new level
 993// the current column is greater than the indentation level.  In this case,
 994// append or insert the specified token into the token queue.
 995func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool {
 996	// In the flow context, do nothing.
 997	if parser.flow_level > 0 {
 998		return true
 999	}
1000
1001	if parser.indent < column {
1002		// Push the current indentation level to the stack and set the new
1003		// indentation level.
1004		parser.indents = append(parser.indents, parser.indent)
1005		parser.indent = column
1006		if len(parser.indents) > max_indents {
1007			return yaml_parser_set_scanner_error(parser,
1008				"while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
1009				fmt.Sprintf("exceeded max depth of %d", max_indents))
1010		}
1011
1012		// Create a token and insert it into the queue.
1013		token := yaml_token_t{
1014			typ:        typ,
1015			start_mark: mark,
1016			end_mark:   mark,
1017		}
1018		if number > -1 {
1019			number -= parser.tokens_parsed
1020		}
1021		yaml_insert_token(parser, number, &token)
1022	}
1023	return true
1024}
1025
1026// Pop indentation levels from the indents stack until the current level
1027// becomes less or equal to the column.  For each indentation level, append
1028// the BLOCK-END token.
1029func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool {
1030	// In the flow context, do nothing.
1031	if parser.flow_level > 0 {
1032		return true
1033	}
1034
1035	block_mark := scan_mark
1036	block_mark.index--
1037
1038	// Loop through the indentation levels in the stack.
1039	for parser.indent > column {
1040
1041		// [Go] Reposition the end token before potential following
1042		//      foot comments of parent blocks. For that, search
1043		//      backwards for recent comments that were at the same
1044		//      indent as the block that is ending now.
1045		stop_index := block_mark.index
1046		for i := len(parser.comments) - 1; i >= 0; i-- {
1047			comment := &parser.comments[i]
1048
1049			if comment.end_mark.index < stop_index {
1050				// Don't go back beyond the start of the comment/whitespace scan, unless column < 0.
1051				// If requested indent column is < 0, then the document is over and everything else
1052				// is a foot anyway.
1053				break
1054			}
1055			if comment.start_mark.column == parser.indent+1 {
1056				// This is a good match. But maybe there's a former comment
1057				// at that same indent level, so keep searching.
1058				block_mark = comment.start_mark
1059			}
1060
1061			// While the end of the former comment matches with
1062			// the start of the following one, we know there's
1063			// nothing in between and scanning is still safe.
1064			stop_index = comment.scan_mark.index
1065		}
1066
1067		// Create a token and append it to the queue.
1068		token := yaml_token_t{
1069			typ:        yaml_BLOCK_END_TOKEN,
1070			start_mark: block_mark,
1071			end_mark:   block_mark,
1072		}
1073		yaml_insert_token(parser, -1, &token)
1074
1075		// Pop the indentation level.
1076		parser.indent = parser.indents[len(parser.indents)-1]
1077		parser.indents = parser.indents[:len(parser.indents)-1]
1078	}
1079	return true
1080}
1081
1082// Initialize the scanner and produce the STREAM-START token.
1083func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
1084
1085	// Set the initial indentation.
1086	parser.indent = -1
1087
1088	// Initialize the simple key stack.
1089	parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
1090
1091	parser.simple_keys_by_tok = make(map[int]int)
1092
1093	// A simple key is allowed at the beginning of the stream.
1094	parser.simple_key_allowed = true
1095
1096	// We have started.
1097	parser.stream_start_produced = true
1098
1099	// Create the STREAM-START token and append it to the queue.
1100	token := yaml_token_t{
1101		typ:        yaml_STREAM_START_TOKEN,
1102		start_mark: parser.mark,
1103		end_mark:   parser.mark,
1104		encoding:   parser.encoding,
1105	}
1106	yaml_insert_token(parser, -1, &token)
1107	return true
1108}
1109
1110// Produce the STREAM-END token and shut down the scanner.
1111func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
1112
1113	// Force new line.
1114	if parser.mark.column != 0 {
1115		parser.mark.column = 0
1116		parser.mark.line++
1117	}
1118
1119	// Reset the indentation level.
1120	if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
1121		return false
1122	}
1123
1124	// Reset simple keys.
1125	if !yaml_parser_remove_simple_key(parser) {
1126		return false
1127	}
1128
1129	parser.simple_key_allowed = false
1130
1131	// Create the STREAM-END token and append it to the queue.
1132	token := yaml_token_t{
1133		typ:        yaml_STREAM_END_TOKEN,
1134		start_mark: parser.mark,
1135		end_mark:   parser.mark,
1136	}
1137	yaml_insert_token(parser, -1, &token)
1138	return true
1139}
1140
1141// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1142func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
1143	// Reset the indentation level.
1144	if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
1145		return false
1146	}
1147
1148	// Reset simple keys.
1149	if !yaml_parser_remove_simple_key(parser) {
1150		return false
1151	}
1152
1153	parser.simple_key_allowed = false
1154
1155	// Create the YAML-DIRECTIVE or TAG-DIRECTIVE token.
1156	token := yaml_token_t{}
1157	if !yaml_parser_scan_directive(parser, &token) {
1158		return false
1159	}
1160	// Append the token to the queue.
1161	yaml_insert_token(parser, -1, &token)
1162	return true
1163}
1164
1165// Produce the DOCUMENT-START or DOCUMENT-END token.
1166func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1167	// Reset the indentation level.
1168	if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
1169		return false
1170	}
1171
1172	// Reset simple keys.
1173	if !yaml_parser_remove_simple_key(parser) {
1174		return false
1175	}
1176
1177	parser.simple_key_allowed = false
1178
1179	// Consume the token.
1180	start_mark := parser.mark
1181
1182	skip(parser)
1183	skip(parser)
1184	skip(parser)
1185
1186	end_mark := parser.mark
1187
1188	// Create the DOCUMENT-START or DOCUMENT-END token.
1189	token := yaml_token_t{
1190		typ:        typ,
1191		start_mark: start_mark,
1192		end_mark:   end_mark,
1193	}
1194	// Append the token to the queue.
1195	yaml_insert_token(parser, -1, &token)
1196	return true
1197}
1198
1199// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1200func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1201
1202	// The indicators '[' and '{' may start a simple key.
1203	if !yaml_parser_save_simple_key(parser) {
1204		return false
1205	}
1206
1207	// Increase the flow level.
1208	if !yaml_parser_increase_flow_level(parser) {
1209		return false
1210	}
1211
1212	// A simple key may follow the indicators '[' and '{'.
1213	parser.simple_key_allowed = true
1214
1215	// Consume the token.
1216	start_mark := parser.mark
1217	skip(parser)
1218	end_mark := parser.mark
1219
1220	// Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token.
1221	token := yaml_token_t{
1222		typ:        typ,
1223		start_mark: start_mark,
1224		end_mark:   end_mark,
1225	}
1226	// Append the token to the queue.
1227	yaml_insert_token(parser, -1, &token)
1228	return true
1229}
1230
1231// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1232func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1233	// Reset any potential simple key on the current flow level.
1234	if !yaml_parser_remove_simple_key(parser) {
1235		return false
1236	}
1237
1238	// Decrease the flow level.
1239	if !yaml_parser_decrease_flow_level(parser) {
1240		return false
1241	}
1242
1243	// No simple keys after the indicators ']' and '}'.
1244	parser.simple_key_allowed = false
1245
1246	// Consume the token.
1247
1248	start_mark := parser.mark
1249	skip(parser)
1250	end_mark := parser.mark
1251
1252	// Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token.
1253	token := yaml_token_t{
1254		typ:        typ,
1255		start_mark: start_mark,
1256		end_mark:   end_mark,
1257	}
1258	// Append the token to the queue.
1259	yaml_insert_token(parser, -1, &token)
1260	return true
1261}
1262
1263// Produce the FLOW-ENTRY token.
1264func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool {
1265	// Reset any potential simple keys on the current flow level.
1266	if !yaml_parser_remove_simple_key(parser) {
1267		return false
1268	}
1269
1270	// Simple keys are allowed after ','.
1271	parser.simple_key_allowed = true
1272
1273	// Consume the token.
1274	start_mark := parser.mark
1275	skip(parser)
1276	end_mark := parser.mark
1277
1278	// Create the FLOW-ENTRY token and append it to the queue.
1279	token := yaml_token_t{
1280		typ:        yaml_FLOW_ENTRY_TOKEN,
1281		start_mark: start_mark,
1282		end_mark:   end_mark,
1283	}
1284	yaml_insert_token(parser, -1, &token)
1285	return true
1286}
1287
1288// Produce the BLOCK-ENTRY token.
1289func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool {
1290	// Check if the scanner is in the block context.
1291	if parser.flow_level == 0 {
1292		// Check if we are allowed to start a new entry.
1293		if !parser.simple_key_allowed {
1294			return yaml_parser_set_scanner_error(parser, "", parser.mark,
1295				"block sequence entries are not allowed in this context")
1296		}
1297		// Add the BLOCK-SEQUENCE-START token if needed.
1298		if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) {
1299			return false
1300		}
1301	} else {
1302		// It is an error for the '-' indicator to occur in the flow context,
1303		// but we let the Parser detect and report about it because the Parser
1304		// is able to point to the context.
1305	}
1306
1307	// Reset any potential simple keys on the current flow level.
1308	if !yaml_parser_remove_simple_key(parser) {
1309		return false
1310	}
1311
1312	// Simple keys are allowed after '-'.
1313	parser.simple_key_allowed = true
1314
1315	// Consume the token.
1316	start_mark := parser.mark
1317	skip(parser)
1318	end_mark := parser.mark
1319
1320	// Create the BLOCK-ENTRY token and append it to the queue.
1321	token := yaml_token_t{
1322		typ:        yaml_BLOCK_ENTRY_TOKEN,
1323		start_mark: start_mark,
1324		end_mark:   end_mark,
1325	}
1326	yaml_insert_token(parser, -1, &token)
1327	return true
1328}
1329
1330// Produce the KEY token.
1331func yaml_parser_fetch_key(parser *yaml_parser_t) bool {
1332
1333	// In the block context, additional checks are required.
1334	if parser.flow_level == 0 {
1335		// Check if we are allowed to start a new key (not nessesary simple).
1336		if !parser.simple_key_allowed {
1337			return yaml_parser_set_scanner_error(parser, "", parser.mark,
1338				"mapping keys are not allowed in this context")
1339		}
1340		// Add the BLOCK-MAPPING-START token if needed.
1341		if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
1342			return false
1343		}
1344	}
1345
1346	// Reset any potential simple keys on the current flow level.
1347	if !yaml_parser_remove_simple_key(parser) {
1348		return false
1349	}
1350
1351	// Simple keys are allowed after '?' in the block context.
1352	parser.simple_key_allowed = parser.flow_level == 0
1353
1354	// Consume the token.
1355	start_mark := parser.mark
1356	skip(parser)
1357	end_mark := parser.mark
1358
1359	// Create the KEY token and append it to the queue.
1360	token := yaml_token_t{
1361		typ:        yaml_KEY_TOKEN,
1362		start_mark: start_mark,
1363		end_mark:   end_mark,
1364	}
1365	yaml_insert_token(parser, -1, &token)
1366	return true
1367}
1368
1369// Produce the VALUE token.
1370func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
1371
1372	simple_key := &parser.simple_keys[len(parser.simple_keys)-1]
1373
1374	// Have we found a simple key?
1375	if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
1376		return false
1377
1378	} else if valid {
1379
1380		// Create the KEY token and insert it into the queue.
1381		token := yaml_token_t{
1382			typ:        yaml_KEY_TOKEN,
1383			start_mark: simple_key.mark,
1384			end_mark:   simple_key.mark,
1385		}
1386		yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token)
1387
1388		// In the block context, we may need to add the BLOCK-MAPPING-START token.
1389		if !yaml_parser_roll_indent(parser, simple_key.mark.column,
1390			simple_key.token_number,
1391			yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) {
1392			return false
1393		}
1394
1395		// Remove the simple key.
1396		simple_key.possible = false
1397		delete(parser.simple_keys_by_tok, simple_key.token_number)
1398
1399		// A simple key cannot follow another simple key.
1400		parser.simple_key_allowed = false
1401
1402	} else {
1403		// The ':' indicator follows a complex key.
1404
1405		// In the block context, extra checks are required.
1406		if parser.flow_level == 0 {
1407
1408			// Check if we are allowed to start a complex value.
1409			if !parser.simple_key_allowed {
1410				return yaml_parser_set_scanner_error(parser, "", parser.mark,
1411					"mapping values are not allowed in this context")
1412			}
1413
1414			// Add the BLOCK-MAPPING-START token if needed.
1415			if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
1416				return false
1417			}
1418		}
1419
1420		// Simple keys after ':' are allowed in the block context.
1421		parser.simple_key_allowed = parser.flow_level == 0
1422	}
1423
1424	// Consume the token.
1425	start_mark := parser.mark
1426	skip(parser)
1427	end_mark := parser.mark
1428
1429	// Create the VALUE token and append it to the queue.
1430	token := yaml_token_t{
1431		typ:        yaml_VALUE_TOKEN,
1432		start_mark: start_mark,
1433		end_mark:   end_mark,
1434	}
1435	yaml_insert_token(parser, -1, &token)
1436	return true
1437}
1438
1439// Produce the ALIAS or ANCHOR token.
1440func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool {
1441	// An anchor or an alias could be a simple key.
1442	if !yaml_parser_save_simple_key(parser) {
1443		return false
1444	}
1445
1446	// A simple key cannot follow an anchor or an alias.
1447	parser.simple_key_allowed = false
1448
1449	// Create the ALIAS or ANCHOR token and append it to the queue.
1450	var token yaml_token_t
1451	if !yaml_parser_scan_anchor(parser, &token, typ) {
1452		return false
1453	}
1454	yaml_insert_token(parser, -1, &token)
1455	return true
1456}
1457
1458// Produce the TAG token.
1459func yaml_parser_fetch_tag(parser *yaml_parser_t) bool {
1460	// A tag could be a simple key.
1461	if !yaml_parser_save_simple_key(parser) {
1462		return false
1463	}
1464
1465	// A simple key cannot follow a tag.
1466	parser.simple_key_allowed = false
1467
1468	// Create the TAG token and append it to the queue.
1469	var token yaml_token_t
1470	if !yaml_parser_scan_tag(parser, &token) {
1471		return false
1472	}
1473	yaml_insert_token(parser, -1, &token)
1474	return true
1475}
1476
1477// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1478func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool {
1479	// Remove any potential simple keys.
1480	if !yaml_parser_remove_simple_key(parser) {
1481		return false
1482	}
1483
1484	// A simple key may follow a block scalar.
1485	parser.simple_key_allowed = true
1486
1487	// Create the SCALAR token and append it to the queue.
1488	var token yaml_token_t
1489	if !yaml_parser_scan_block_scalar(parser, &token, literal) {
1490		return false
1491	}
1492	yaml_insert_token(parser, -1, &token)
1493	return true
1494}
1495
1496// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1497func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool {
1498	// A plain scalar could be a simple key.
1499	if !yaml_parser_save_simple_key(parser) {
1500		return false
1501	}
1502
1503	// A simple key cannot follow a flow scalar.
1504	parser.simple_key_allowed = false
1505
1506	// Create the SCALAR token and append it to the queue.
1507	var token yaml_token_t
1508	if !yaml_parser_scan_flow_scalar(parser, &token, single) {
1509		return false
1510	}
1511	yaml_insert_token(parser, -1, &token)
1512	return true
1513}
1514
1515// Produce the SCALAR(...,plain) token.
1516func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {
1517	// A plain scalar could be a simple key.
1518	if !yaml_parser_save_simple_key(parser) {
1519		return false
1520	}
1521
1522	// A simple key cannot follow a flow scalar.
1523	parser.simple_key_allowed = false
1524
1525	// Create the SCALAR token and append it to the queue.
1526	var token yaml_token_t
1527	if !yaml_parser_scan_plain_scalar(parser, &token) {
1528		return false
1529	}
1530	yaml_insert_token(parser, -1, &token)
1531	return true
1532}
1533
1534// Eat whitespaces and comments until the next token is found.
1535func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
1536
1537	scan_mark := parser.mark
1538
1539	// Until the next token is not found.
1540	for {
1541		// Allow the BOM mark to start a line.
1542		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1543			return false
1544		}
1545		if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) {
1546			skip(parser)
1547		}
1548
1549		// Eat whitespaces.
1550		// Tabs are allowed:
1551		//  - in the flow context
1552		//  - in the block context, but not at the beginning of the line or
1553		//  after '-', '?', or ':' (complex value).
1554		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1555			return false
1556		}
1557
1558		for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') {
1559			skip(parser)
1560			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1561				return false
1562			}
1563		}
1564
1565		// Check if we just had a line comment under a sequence entry that
1566		// looks more like a header to the following content. Similar to this:
1567		//
1568		// - # The comment
1569		//   - Some data
1570		//
1571		// If so, transform the line comment to a head comment and reposition.
1572		if len(parser.comments) > 0 && len(parser.tokens) > 1 {
1573			tokenA := parser.tokens[len(parser.tokens)-2]
1574			tokenB := parser.tokens[len(parser.tokens)-1]
1575			comment := &parser.comments[len(parser.comments)-1]
1576			if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {
1577				// If it was in the prior line, reposition so it becomes a
1578				// header of the follow up token. Otherwise, keep it in place
1579				// so it becomes a header of the former.
1580				comment.head = comment.line
1581				comment.line = nil
1582				if comment.start_mark.line == parser.mark.line-1 {
1583					comment.token_mark = parser.mark
1584				}
1585			}
1586		}
1587
1588		// Eat a comment until a line break.
1589		if parser.buffer[parser.buffer_pos] == '#' {
1590			if !yaml_parser_scan_comments(parser, scan_mark) {
1591				return false
1592			}
1593		}
1594
1595		// If it is a line break, eat it.
1596		if is_break(parser.buffer, parser.buffer_pos) {
1597			if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
1598				return false
1599			}
1600			skip_line(parser)
1601
1602			// In the block context, a new line may start a simple key.
1603			if parser.flow_level == 0 {
1604				parser.simple_key_allowed = true
1605			}
1606		} else {
1607			break // We have found a token.
1608		}
1609	}
1610
1611	return true
1612}
1613
1614// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1615//
1616// Scope:
1617//
1618//	%YAML    1.1    # a comment \n
1619//	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1620//	%TAG    !yaml!  tag:yaml.org,2002:  \n
1621//	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1622func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool {
1623	// Eat '%'.
1624	start_mark := parser.mark
1625	skip(parser)
1626
1627	// Scan the directive name.
1628	var name []byte
1629	if !yaml_parser_scan_directive_name(parser, start_mark, &name) {
1630		return false
1631	}
1632
1633	// Is it a YAML directive?
1634	if bytes.Equal(name, []byte("YAML")) {
1635		// Scan the VERSION directive value.
1636		var major, minor int8
1637		if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) {
1638			return false
1639		}
1640		end_mark := parser.mark
1641
1642		// Create a VERSION-DIRECTIVE token.
1643		*token = yaml_token_t{
1644			typ:        yaml_VERSION_DIRECTIVE_TOKEN,
1645			start_mark: start_mark,
1646			end_mark:   end_mark,
1647			major:      major,
1648			minor:      minor,
1649		}
1650
1651		// Is it a TAG directive?
1652	} else if bytes.Equal(name, []byte("TAG")) {
1653		// Scan the TAG directive value.
1654		var handle, prefix []byte
1655		if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) {
1656			return false
1657		}
1658		end_mark := parser.mark
1659
1660		// Create a TAG-DIRECTIVE token.
1661		*token = yaml_token_t{
1662			typ:        yaml_TAG_DIRECTIVE_TOKEN,
1663			start_mark: start_mark,
1664			end_mark:   end_mark,
1665			value:      handle,
1666			prefix:     prefix,
1667		}
1668
1669		// Unknown directive.
1670	} else {
1671		yaml_parser_set_scanner_error(parser, "while scanning a directive",
1672			start_mark, "found unknown directive name")
1673		return false
1674	}
1675
1676	// Eat the rest of the line including any comments.
1677	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1678		return false
1679	}
1680
1681	for is_blank(parser.buffer, parser.buffer_pos) {
1682		skip(parser)
1683		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1684			return false
1685		}
1686	}
1687
1688	if parser.buffer[parser.buffer_pos] == '#' {
1689		// [Go] Discard this inline comment for the time being.
1690		//if !yaml_parser_scan_line_comment(parser, start_mark) {
1691		//	return false
1692		//}
1693		for !is_breakz(parser.buffer, parser.buffer_pos) {
1694			skip(parser)
1695			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1696				return false
1697			}
1698		}
1699	}
1700
1701	// Check if we are at the end of the line.
1702	if !is_breakz(parser.buffer, parser.buffer_pos) {
1703		yaml_parser_set_scanner_error(parser, "while scanning a directive",
1704			start_mark, "did not find expected comment or line break")
1705		return false
1706	}
1707
1708	// Eat a line break.
1709	if is_break(parser.buffer, parser.buffer_pos) {
1710		if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
1711			return false
1712		}
1713		skip_line(parser)
1714	}
1715
1716	return true
1717}
1718
1719// Scan the directive name.
1720//
1721// Scope:
1722//
1723//	%YAML   1.1     # a comment \n
1724//	 ^^^^
1725//	%TAG    !yaml!  tag:yaml.org,2002:  \n
1726//	 ^^^
1727func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool {
1728	// Consume the directive name.
1729	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1730		return false
1731	}
1732
1733	var s []byte
1734	for is_alpha(parser.buffer, parser.buffer_pos) {
1735		s = read(parser, s)
1736		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1737			return false
1738		}
1739	}
1740
1741	// Check if the name is empty.
1742	if len(s) == 0 {
1743		yaml_parser_set_scanner_error(parser, "while scanning a directive",
1744			start_mark, "could not find expected directive name")
1745		return false
1746	}
1747
1748	// Check for an blank character after the name.
1749	if !is_blankz(parser.buffer, parser.buffer_pos) {
1750		yaml_parser_set_scanner_error(parser, "while scanning a directive",
1751			start_mark, "found unexpected non-alphabetical character")
1752		return false
1753	}
1754	*name = s
1755	return true
1756}
1757
1758// Scan the value of VERSION-DIRECTIVE.
1759//
1760// Scope:
1761//
1762//	%YAML   1.1     # a comment \n
1763//	     ^^^^^^
1764func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool {
1765	// Eat whitespaces.
1766	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1767		return false
1768	}
1769	for is_blank(parser.buffer, parser.buffer_pos) {
1770		skip(parser)
1771		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1772			return false
1773		}
1774	}
1775
1776	// Consume the major version number.
1777	if !yaml_parser_scan_version_directive_number(parser, start_mark, major) {
1778		return false
1779	}
1780
1781	// Eat '.'.
1782	if parser.buffer[parser.buffer_pos] != '.' {
1783		return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
1784			start_mark, "did not find expected digit or '.' character")
1785	}
1786
1787	skip(parser)
1788
1789	// Consume the minor version number.
1790	if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) {
1791		return false
1792	}
1793	return true
1794}
1795
1796const max_number_length = 2
1797
1798// Scan the version number of VERSION-DIRECTIVE.
1799//
1800// Scope:
1801//
1802//	%YAML   1.1     # a comment \n
1803//	        ^
1804//	%YAML   1.1     # a comment \n
1805//	          ^
1806func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool {
1807
1808	// Repeat while the next character is digit.
1809	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1810		return false
1811	}
1812	var value, length int8
1813	for is_digit(parser.buffer, parser.buffer_pos) {
1814		// Check if the number is too long.
1815		length++
1816		if length > max_number_length {
1817			return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
1818				start_mark, "found extremely long version number")
1819		}
1820		value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos))
1821		skip(parser)
1822		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1823			return false
1824		}
1825	}
1826
1827	// Check if the number was present.
1828	if length == 0 {
1829		return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
1830			start_mark, "did not find expected version number")
1831	}
1832	*number = value
1833	return true
1834}
1835
1836// Scan the value of a TAG-DIRECTIVE token.
1837//
1838// Scope:
1839//
1840//	%TAG    !yaml!  tag:yaml.org,2002:  \n
1841//	    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1842func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool {
1843	var handle_value, prefix_value []byte
1844
1845	// Eat whitespaces.
1846	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1847		return false
1848	}
1849
1850	for is_blank(parser.buffer, parser.buffer_pos) {
1851		skip(parser)
1852		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1853			return false
1854		}
1855	}
1856
1857	// Scan a handle.
1858	if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) {
1859		return false
1860	}
1861
1862	// Expect a whitespace.
1863	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1864		return false
1865	}
1866	if !is_blank(parser.buffer, parser.buffer_pos) {
1867		yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
1868			start_mark, "did not find expected whitespace")
1869		return false
1870	}
1871
1872	// Eat whitespaces.
1873	for is_blank(parser.buffer, parser.buffer_pos) {
1874		skip(parser)
1875		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1876			return false
1877		}
1878	}
1879
1880	// Scan a prefix.
1881	if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) {
1882		return false
1883	}
1884
1885	// Expect a whitespace or line break.
1886	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1887		return false
1888	}
1889	if !is_blankz(parser.buffer, parser.buffer_pos) {
1890		yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
1891			start_mark, "did not find expected whitespace or line break")
1892		return false
1893	}
1894
1895	*handle = handle_value
1896	*prefix = prefix_value
1897	return true
1898}
1899
1900func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool {
1901	var s []byte
1902
1903	// Eat the indicator character.
1904	start_mark := parser.mark
1905	skip(parser)
1906
1907	// Consume the value.
1908	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1909		return false
1910	}
1911
1912	for is_alpha(parser.buffer, parser.buffer_pos) {
1913		s = read(parser, s)
1914		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
1915			return false
1916		}
1917	}
1918
1919	end_mark := parser.mark
1920
1921	/*
1922	 * Check if length of the anchor is greater than 0 and it is followed by
1923	 * a whitespace character or one of the indicators:
1924	 *
1925	 *      '?', ':', ',', ']', '}', '%', '@', '`'.
1926	 */
1927
1928	if len(s) == 0 ||
1929		!(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' ||
1930			parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' ||
1931			parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' ||
1932			parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' ||
1933			parser.buffer[parser.buffer_pos] == '`') {
1934		context := "while scanning an alias"
1935		if typ == yaml_ANCHOR_TOKEN {
1936			context = "while scanning an anchor"
1937		}
1938		yaml_parser_set_scanner_error(parser, context, start_mark,
1939			"did not find expected alphabetic or numeric character")
1940		return false
1941	}
1942
1943	// Create a token.
1944	*token = yaml_token_t{
1945		typ:        typ,
1946		start_mark: start_mark,
1947		end_mark:   end_mark,
1948		value:      s,
1949	}
1950
1951	return true
1952}
1953
1954/*
1955 * Scan a TAG token.
1956 */
1957
1958func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool {
1959	var handle, suffix []byte
1960
1961	start_mark := parser.mark
1962
1963	// Check if the tag is in the canonical form.
1964	if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
1965		return false
1966	}
1967
1968	if parser.buffer[parser.buffer_pos+1] == '<' {
1969		// Keep the handle as ''
1970
1971		// Eat '!<'
1972		skip(parser)
1973		skip(parser)
1974
1975		// Consume the tag value.
1976		if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
1977			return false
1978		}
1979
1980		// Check for '>' and eat it.
1981		if parser.buffer[parser.buffer_pos] != '>' {
1982			yaml_parser_set_scanner_error(parser, "while scanning a tag",
1983				start_mark, "did not find the expected '>'")
1984			return false
1985		}
1986
1987		skip(parser)
1988	} else {
1989		// The tag has either the '!suffix' or the '!handle!suffix' form.
1990
1991		// First, try to scan a handle.
1992		if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) {
1993			return false
1994		}
1995
1996		// Check if it is, indeed, handle.
1997		if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' {
1998			// Scan the suffix now.
1999			if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
2000				return false
2001			}
2002		} else {
2003			// It wasn't a handle after all.  Scan the rest of the tag.
2004			if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) {
2005				return false
2006			}
2007
2008			// Set the handle to '!'.
2009			handle = []byte{'!'}
2010
2011			// A special case: the '!' tag.  Set the handle to '' and the
2012			// suffix to '!'.
2013			if len(suffix) == 0 {
2014				handle, suffix = suffix, handle
2015			}
2016		}
2017	}
2018
2019	// Check the character which ends the tag.
2020	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2021		return false
2022	}
2023	if !is_blankz(parser.buffer, parser.buffer_pos) {
2024		yaml_parser_set_scanner_error(parser, "while scanning a tag",
2025			start_mark, "did not find expected whitespace or line break")
2026		return false
2027	}
2028
2029	end_mark := parser.mark
2030
2031	// Create a token.
2032	*token = yaml_token_t{
2033		typ:        yaml_TAG_TOKEN,
2034		start_mark: start_mark,
2035		end_mark:   end_mark,
2036		value:      handle,
2037		suffix:     suffix,
2038	}
2039	return true
2040}
2041
2042// Scan a tag handle.
2043func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool {
2044	// Check the initial '!' character.
2045	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2046		return false
2047	}
2048	if parser.buffer[parser.buffer_pos] != '!' {
2049		yaml_parser_set_scanner_tag_error(parser, directive,
2050			start_mark, "did not find expected '!'")
2051		return false
2052	}
2053
2054	var s []byte
2055
2056	// Copy the '!' character.
2057	s = read(parser, s)
2058
2059	// Copy all subsequent alphabetical and numerical characters.
2060	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2061		return false
2062	}
2063	for is_alpha(parser.buffer, parser.buffer_pos) {
2064		s = read(parser, s)
2065		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2066			return false
2067		}
2068	}
2069
2070	// Check if the trailing character is '!' and copy it.
2071	if parser.buffer[parser.buffer_pos] == '!' {
2072		s = read(parser, s)
2073	} else {
2074		// It's either the '!' tag or not really a tag handle.  If it's a %TAG
2075		// directive, it's an error.  If it's a tag token, it must be a part of URI.
2076		if directive && string(s) != "!" {
2077			yaml_parser_set_scanner_tag_error(parser, directive,
2078				start_mark, "did not find expected '!'")
2079			return false
2080		}
2081	}
2082
2083	*handle = s
2084	return true
2085}
2086
2087// Scan a tag.
2088func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool {
2089	//size_t length = head ? strlen((char *)head) : 0
2090	var s []byte
2091	hasTag := len(head) > 0
2092
2093	// Copy the head if needed.
2094	//
2095	// Note that we don't copy the leading '!' character.
2096	if len(head) > 1 {
2097		s = append(s, head[1:]...)
2098	}
2099
2100	// Scan the tag.
2101	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2102		return false
2103	}
2104
2105	// The set of characters that may appear in URI is as follows:
2106	//
2107	//      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2108	//      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
2109	//      '%'.
2110	// [Go] TODO Convert this into more reasonable logic.
2111	for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' ||
2112		parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' ||
2113		parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' ||
2114		parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' ||
2115		parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' ||
2116		parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' ||
2117		parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' ||
2118		parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' ||
2119		parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' ||
2120		parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' ||
2121		parser.buffer[parser.buffer_pos] == '%' {
2122		// Check if it is a URI-escape sequence.
2123		if parser.buffer[parser.buffer_pos] == '%' {
2124			if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) {
2125				return false
2126			}
2127		} else {
2128			s = read(parser, s)
2129		}
2130		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2131			return false
2132		}
2133		hasTag = true
2134	}
2135
2136	if !hasTag {
2137		yaml_parser_set_scanner_tag_error(parser, directive,
2138			start_mark, "did not find expected tag URI")
2139		return false
2140	}
2141	*uri = s
2142	return true
2143}
2144
2145// Decode an URI-escape sequence corresponding to a single UTF-8 character.
2146func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool {
2147
2148	// Decode the required number of characters.
2149	w := 1024
2150	for w > 0 {
2151		// Check for a URI-escaped octet.
2152		if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
2153			return false
2154		}
2155
2156		if !(parser.buffer[parser.buffer_pos] == '%' &&
2157			is_hex(parser.buffer, parser.buffer_pos+1) &&
2158			is_hex(parser.buffer, parser.buffer_pos+2)) {
2159			return yaml_parser_set_scanner_tag_error(parser, directive,
2160				start_mark, "did not find URI escaped octet")
2161		}
2162
2163		// Get the octet.
2164		octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2))
2165
2166		// If it is the leading octet, determine the length of the UTF-8 sequence.
2167		if w == 1024 {
2168			w = width(octet)
2169			if w == 0 {
2170				return yaml_parser_set_scanner_tag_error(parser, directive,
2171					start_mark, "found an incorrect leading UTF-8 octet")
2172			}
2173		} else {
2174			// Check if the trailing octet is correct.
2175			if octet&0xC0 != 0x80 {
2176				return yaml_parser_set_scanner_tag_error(parser, directive,
2177					start_mark, "found an incorrect trailing UTF-8 octet")
2178			}
2179		}
2180
2181		// Copy the octet and move the pointers.
2182		*s = append(*s, octet)
2183		skip(parser)
2184		skip(parser)
2185		skip(parser)
2186		w--
2187	}
2188	return true
2189}
2190
2191// Scan a block scalar.
2192func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool {
2193	// Eat the indicator '|' or '>'.
2194	start_mark := parser.mark
2195	skip(parser)
2196
2197	// Scan the additional block scalar indicators.
2198	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2199		return false
2200	}
2201
2202	// Check for a chomping indicator.
2203	var chomping, increment int
2204	if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
2205		// Set the chomping method and eat the indicator.
2206		if parser.buffer[parser.buffer_pos] == '+' {
2207			chomping = +1
2208		} else {
2209			chomping = -1
2210		}
2211		skip(parser)
2212
2213		// Check for an indentation indicator.
2214		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2215			return false
2216		}
2217		if is_digit(parser.buffer, parser.buffer_pos) {
2218			// Check that the indentation is greater than 0.
2219			if parser.buffer[parser.buffer_pos] == '0' {
2220				yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2221					start_mark, "found an indentation indicator equal to 0")
2222				return false
2223			}
2224
2225			// Get the indentation level and eat the indicator.
2226			increment = as_digit(parser.buffer, parser.buffer_pos)
2227			skip(parser)
2228		}
2229
2230	} else if is_digit(parser.buffer, parser.buffer_pos) {
2231		// Do the same as above, but in the opposite order.
2232
2233		if parser.buffer[parser.buffer_pos] == '0' {
2234			yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2235				start_mark, "found an indentation indicator equal to 0")
2236			return false
2237		}
2238		increment = as_digit(parser.buffer, parser.buffer_pos)
2239		skip(parser)
2240
2241		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2242			return false
2243		}
2244		if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
2245			if parser.buffer[parser.buffer_pos] == '+' {
2246				chomping = +1
2247			} else {
2248				chomping = -1
2249			}
2250			skip(parser)
2251		}
2252	}
2253
2254	// Eat whitespaces and comments to the end of the line.
2255	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2256		return false
2257	}
2258	for is_blank(parser.buffer, parser.buffer_pos) {
2259		skip(parser)
2260		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2261			return false
2262		}
2263	}
2264	if parser.buffer[parser.buffer_pos] == '#' {
2265		if !yaml_parser_scan_line_comment(parser, start_mark) {
2266			return false
2267		}
2268		for !is_breakz(parser.buffer, parser.buffer_pos) {
2269			skip(parser)
2270			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2271				return false
2272			}
2273		}
2274	}
2275
2276	// Check if we are at the end of the line.
2277	if !is_breakz(parser.buffer, parser.buffer_pos) {
2278		yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2279			start_mark, "did not find expected comment or line break")
2280		return false
2281	}
2282
2283	// Eat a line break.
2284	if is_break(parser.buffer, parser.buffer_pos) {
2285		if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2286			return false
2287		}
2288		skip_line(parser)
2289	}
2290
2291	end_mark := parser.mark
2292
2293	// Set the indentation level if it was specified.
2294	var indent int
2295	if increment > 0 {
2296		if parser.indent >= 0 {
2297			indent = parser.indent + increment
2298		} else {
2299			indent = increment
2300		}
2301	}
2302
2303	// Scan the leading line breaks and determine the indentation level if needed.
2304	var s, leading_break, trailing_breaks []byte
2305	if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
2306		return false
2307	}
2308
2309	// Scan the block scalar content.
2310	if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2311		return false
2312	}
2313	var leading_blank, trailing_blank bool
2314	for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) {
2315		// We are at the beginning of a non-empty line.
2316
2317		// Is it a trailing whitespace?
2318		trailing_blank = is_blank(parser.buffer, parser.buffer_pos)
2319
2320		// Check if we need to fold the leading line break.
2321		if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' {
2322			// Do we need to join the lines by space?
2323			if len(trailing_breaks) == 0 {
2324				s = append(s, ' ')
2325			}
2326		} else {
2327			s = append(s, leading_break...)
2328		}
2329		leading_break = leading_break[:0]
2330
2331		// Append the remaining line breaks.
2332		s = append(s, trailing_breaks...)
2333		trailing_breaks = trailing_breaks[:0]
2334
2335		// Is it a leading whitespace?
2336		leading_blank = is_blank(parser.buffer, parser.buffer_pos)
2337
2338		// Consume the current line.
2339		for !is_breakz(parser.buffer, parser.buffer_pos) {
2340			s = read(parser, s)
2341			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2342				return false
2343			}
2344		}
2345
2346		// Consume the line break.
2347		if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2348			return false
2349		}
2350
2351		leading_break = read_line(parser, leading_break)
2352
2353		// Eat the following indentation spaces and line breaks.
2354		if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
2355			return false
2356		}
2357	}
2358
2359	// Chomp the tail.
2360	if chomping != -1 {
2361		s = append(s, leading_break...)
2362	}
2363	if chomping == 1 {
2364		s = append(s, trailing_breaks...)
2365	}
2366
2367	// Create a token.
2368	*token = yaml_token_t{
2369		typ:        yaml_SCALAR_TOKEN,
2370		start_mark: start_mark,
2371		end_mark:   end_mark,
2372		value:      s,
2373		style:      yaml_LITERAL_SCALAR_STYLE,
2374	}
2375	if !literal {
2376		token.style = yaml_FOLDED_SCALAR_STYLE
2377	}
2378	return true
2379}
2380
2381// Scan indentation spaces and line breaks for a block scalar.  Determine the
2382// indentation level if needed.
2383func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool {
2384	*end_mark = parser.mark
2385
2386	// Eat the indentation spaces and line breaks.
2387	max_indent := 0
2388	for {
2389		// Eat the indentation spaces.
2390		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2391			return false
2392		}
2393		for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) {
2394			skip(parser)
2395			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2396				return false
2397			}
2398		}
2399		if parser.mark.column > max_indent {
2400			max_indent = parser.mark.column
2401		}
2402
2403		// Check for a tab character messing the indentation.
2404		if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) {
2405			return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2406				start_mark, "found a tab character where an indentation space is expected")
2407		}
2408
2409		// Have we found a non-empty line?
2410		if !is_break(parser.buffer, parser.buffer_pos) {
2411			break
2412		}
2413
2414		// Consume the line break.
2415		if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2416			return false
2417		}
2418		// [Go] Should really be returning breaks instead.
2419		*breaks = read_line(parser, *breaks)
2420		*end_mark = parser.mark
2421	}
2422
2423	// Determine the indentation level if needed.
2424	if *indent == 0 {
2425		*indent = max_indent
2426		if *indent < parser.indent+1 {
2427			*indent = parser.indent + 1
2428		}
2429		if *indent < 1 {
2430			*indent = 1
2431		}
2432	}
2433	return true
2434}
2435
2436// Scan a quoted scalar.
2437func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool {
2438	// Eat the left quote.
2439	start_mark := parser.mark
2440	skip(parser)
2441
2442	// Consume the content of the quoted scalar.
2443	var s, leading_break, trailing_breaks, whitespaces []byte
2444	for {
2445		// Check that there are no document indicators at the beginning of the line.
2446		if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
2447			return false
2448		}
2449
2450		if parser.mark.column == 0 &&
2451			((parser.buffer[parser.buffer_pos+0] == '-' &&
2452				parser.buffer[parser.buffer_pos+1] == '-' &&
2453				parser.buffer[parser.buffer_pos+2] == '-') ||
2454				(parser.buffer[parser.buffer_pos+0] == '.' &&
2455					parser.buffer[parser.buffer_pos+1] == '.' &&
2456					parser.buffer[parser.buffer_pos+2] == '.')) &&
2457			is_blankz(parser.buffer, parser.buffer_pos+3) {
2458			yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
2459				start_mark, "found unexpected document indicator")
2460			return false
2461		}
2462
2463		// Check for EOF.
2464		if is_z(parser.buffer, parser.buffer_pos) {
2465			yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
2466				start_mark, "found unexpected end of stream")
2467			return false
2468		}
2469
2470		// Consume non-blank characters.
2471		leading_blanks := false
2472		for !is_blankz(parser.buffer, parser.buffer_pos) {
2473			if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' {
2474				// Is is an escaped single quote.
2475				s = append(s, '\'')
2476				skip(parser)
2477				skip(parser)
2478
2479			} else if single && parser.buffer[parser.buffer_pos] == '\'' {
2480				// It is a right single quote.
2481				break
2482			} else if !single && parser.buffer[parser.buffer_pos] == '"' {
2483				// It is a right double quote.
2484				break
2485
2486			} else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) {
2487				// It is an escaped line break.
2488				if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
2489					return false
2490				}
2491				skip(parser)
2492				skip_line(parser)
2493				leading_blanks = true
2494				break
2495
2496			} else if !single && parser.buffer[parser.buffer_pos] == '\\' {
2497				// It is an escape sequence.
2498				code_length := 0
2499
2500				// Check the escape character.
2501				switch parser.buffer[parser.buffer_pos+1] {
2502				case '0':
2503					s = append(s, 0)
2504				case 'a':
2505					s = append(s, '\x07')
2506				case 'b':
2507					s = append(s, '\x08')
2508				case 't', '\t':
2509					s = append(s, '\x09')
2510				case 'n':
2511					s = append(s, '\x0A')
2512				case 'v':
2513					s = append(s, '\x0B')
2514				case 'f':
2515					s = append(s, '\x0C')
2516				case 'r':
2517					s = append(s, '\x0D')
2518				case 'e':
2519					s = append(s, '\x1B')
2520				case ' ':
2521					s = append(s, '\x20')
2522				case '"':
2523					s = append(s, '"')
2524				case '\'':
2525					s = append(s, '\'')
2526				case '\\':
2527					s = append(s, '\\')
2528				case 'N': // NEL (#x85)
2529					s = append(s, '\xC2')
2530					s = append(s, '\x85')
2531				case '_': // #xA0
2532					s = append(s, '\xC2')
2533					s = append(s, '\xA0')
2534				case 'L': // LS (#x2028)
2535					s = append(s, '\xE2')
2536					s = append(s, '\x80')
2537					s = append(s, '\xA8')
2538				case 'P': // PS (#x2029)
2539					s = append(s, '\xE2')
2540					s = append(s, '\x80')
2541					s = append(s, '\xA9')
2542				case 'x':
2543					code_length = 2
2544				case 'u':
2545					code_length = 4
2546				case 'U':
2547					code_length = 8
2548				default:
2549					yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
2550						start_mark, "found unknown escape character")
2551					return false
2552				}
2553
2554				skip(parser)
2555				skip(parser)
2556
2557				// Consume an arbitrary escape code.
2558				if code_length > 0 {
2559					var value int
2560
2561					// Scan the character value.
2562					if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) {
2563						return false
2564					}
2565					for k := 0; k < code_length; k++ {
2566						if !is_hex(parser.buffer, parser.buffer_pos+k) {
2567							yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
2568								start_mark, "did not find expected hexdecimal number")
2569							return false
2570						}
2571						value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k)
2572					}
2573
2574					// Check the value and write the character.
2575					if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF {
2576						yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
2577							start_mark, "found invalid Unicode character escape code")
2578						return false
2579					}
2580					if value <= 0x7F {
2581						s = append(s, byte(value))
2582					} else if value <= 0x7FF {
2583						s = append(s, byte(0xC0+(value>>6)))
2584						s = append(s, byte(0x80+(value&0x3F)))
2585					} else if value <= 0xFFFF {
2586						s = append(s, byte(0xE0+(value>>12)))
2587						s = append(s, byte(0x80+((value>>6)&0x3F)))
2588						s = append(s, byte(0x80+(value&0x3F)))
2589					} else {
2590						s = append(s, byte(0xF0+(value>>18)))
2591						s = append(s, byte(0x80+((value>>12)&0x3F)))
2592						s = append(s, byte(0x80+((value>>6)&0x3F)))
2593						s = append(s, byte(0x80+(value&0x3F)))
2594					}
2595
2596					// Advance the pointer.
2597					for k := 0; k < code_length; k++ {
2598						skip(parser)
2599					}
2600				}
2601			} else {
2602				// It is a non-escaped non-blank character.
2603				s = read(parser, s)
2604			}
2605			if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2606				return false
2607			}
2608		}
2609
2610		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2611			return false
2612		}
2613
2614		// Check if we are at the end of the scalar.
2615		if single {
2616			if parser.buffer[parser.buffer_pos] == '\'' {
2617				break
2618			}
2619		} else {
2620			if parser.buffer[parser.buffer_pos] == '"' {
2621				break
2622			}
2623		}
2624
2625		// Consume blank characters.
2626		for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
2627			if is_blank(parser.buffer, parser.buffer_pos) {
2628				// Consume a space or a tab character.
2629				if !leading_blanks {
2630					whitespaces = read(parser, whitespaces)
2631				} else {
2632					skip(parser)
2633				}
2634			} else {
2635				if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2636					return false
2637				}
2638
2639				// Check if it is a first line break.
2640				if !leading_blanks {
2641					whitespaces = whitespaces[:0]
2642					leading_break = read_line(parser, leading_break)
2643					leading_blanks = true
2644				} else {
2645					trailing_breaks = read_line(parser, trailing_breaks)
2646				}
2647			}
2648			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2649				return false
2650			}
2651		}
2652
2653		// Join the whitespaces or fold line breaks.
2654		if leading_blanks {
2655			// Do we need to fold line breaks?
2656			if len(leading_break) > 0 && leading_break[0] == '\n' {
2657				if len(trailing_breaks) == 0 {
2658					s = append(s, ' ')
2659				} else {
2660					s = append(s, trailing_breaks...)
2661				}
2662			} else {
2663				s = append(s, leading_break...)
2664				s = append(s, trailing_breaks...)
2665			}
2666			trailing_breaks = trailing_breaks[:0]
2667			leading_break = leading_break[:0]
2668		} else {
2669			s = append(s, whitespaces...)
2670			whitespaces = whitespaces[:0]
2671		}
2672	}
2673
2674	// Eat the right quote.
2675	skip(parser)
2676	end_mark := parser.mark
2677
2678	// Create a token.
2679	*token = yaml_token_t{
2680		typ:        yaml_SCALAR_TOKEN,
2681		start_mark: start_mark,
2682		end_mark:   end_mark,
2683		value:      s,
2684		style:      yaml_SINGLE_QUOTED_SCALAR_STYLE,
2685	}
2686	if !single {
2687		token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
2688	}
2689	return true
2690}
2691
2692// Scan a plain scalar.
2693func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool {
2694
2695	var s, leading_break, trailing_breaks, whitespaces []byte
2696	var leading_blanks bool
2697	var indent = parser.indent + 1
2698
2699	start_mark := parser.mark
2700	end_mark := parser.mark
2701
2702	// Consume the content of the plain scalar.
2703	for {
2704		// Check for a document indicator.
2705		if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
2706			return false
2707		}
2708		if parser.mark.column == 0 &&
2709			((parser.buffer[parser.buffer_pos+0] == '-' &&
2710				parser.buffer[parser.buffer_pos+1] == '-' &&
2711				parser.buffer[parser.buffer_pos+2] == '-') ||
2712				(parser.buffer[parser.buffer_pos+0] == '.' &&
2713					parser.buffer[parser.buffer_pos+1] == '.' &&
2714					parser.buffer[parser.buffer_pos+2] == '.')) &&
2715			is_blankz(parser.buffer, parser.buffer_pos+3) {
2716			break
2717		}
2718
2719		// Check for a comment.
2720		if parser.buffer[parser.buffer_pos] == '#' {
2721			break
2722		}
2723
2724		// Consume non-blank characters.
2725		for !is_blankz(parser.buffer, parser.buffer_pos) {
2726
2727			// Check for indicators that may end a plain scalar.
2728			if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) ||
2729				(parser.flow_level > 0 &&
2730					(parser.buffer[parser.buffer_pos] == ',' ||
2731						parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' ||
2732						parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
2733						parser.buffer[parser.buffer_pos] == '}')) {
2734				break
2735			}
2736
2737			// Check if we need to join whitespaces and breaks.
2738			if leading_blanks || len(whitespaces) > 0 {
2739				if leading_blanks {
2740					// Do we need to fold line breaks?
2741					if leading_break[0] == '\n' {
2742						if len(trailing_breaks) == 0 {
2743							s = append(s, ' ')
2744						} else {
2745							s = append(s, trailing_breaks...)
2746						}
2747					} else {
2748						s = append(s, leading_break...)
2749						s = append(s, trailing_breaks...)
2750					}
2751					trailing_breaks = trailing_breaks[:0]
2752					leading_break = leading_break[:0]
2753					leading_blanks = false
2754				} else {
2755					s = append(s, whitespaces...)
2756					whitespaces = whitespaces[:0]
2757				}
2758			}
2759
2760			// Copy the character.
2761			s = read(parser, s)
2762
2763			end_mark = parser.mark
2764			if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2765				return false
2766			}
2767		}
2768
2769		// Is it the end?
2770		if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) {
2771			break
2772		}
2773
2774		// Consume blank characters.
2775		if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2776			return false
2777		}
2778
2779		for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
2780			if is_blank(parser.buffer, parser.buffer_pos) {
2781
2782				// Check for tab characters that abuse indentation.
2783				if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) {
2784					yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
2785						start_mark, "found a tab character that violates indentation")
2786					return false
2787				}
2788
2789				// Consume a space or a tab character.
2790				if !leading_blanks {
2791					whitespaces = read(parser, whitespaces)
2792				} else {
2793					skip(parser)
2794				}
2795			} else {
2796				if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2797					return false
2798				}
2799
2800				// Check if it is a first line break.
2801				if !leading_blanks {
2802					whitespaces = whitespaces[:0]
2803					leading_break = read_line(parser, leading_break)
2804					leading_blanks = true
2805				} else {
2806					trailing_breaks = read_line(parser, trailing_breaks)
2807				}
2808			}
2809			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2810				return false
2811			}
2812		}
2813
2814		// Check indentation level.
2815		if parser.flow_level == 0 && parser.mark.column < indent {
2816			break
2817		}
2818	}
2819
2820	// Create a token.
2821	*token = yaml_token_t{
2822		typ:        yaml_SCALAR_TOKEN,
2823		start_mark: start_mark,
2824		end_mark:   end_mark,
2825		value:      s,
2826		style:      yaml_PLAIN_SCALAR_STYLE,
2827	}
2828
2829	// Note that we change the 'simple_key_allowed' flag.
2830	if leading_blanks {
2831		parser.simple_key_allowed = true
2832	}
2833	return true
2834}
2835
2836func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool {
2837	if parser.newlines > 0 {
2838		return true
2839	}
2840
2841	var start_mark yaml_mark_t
2842	var text []byte
2843
2844	for peek := 0; peek < 512; peek++ {
2845		if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
2846			break
2847		}
2848		if is_blank(parser.buffer, parser.buffer_pos+peek) {
2849			continue
2850		}
2851		if parser.buffer[parser.buffer_pos+peek] == '#' {
2852			seen := parser.mark.index + peek
2853			for {
2854				if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
2855					return false
2856				}
2857				if is_breakz(parser.buffer, parser.buffer_pos) {
2858					if parser.mark.index >= seen {
2859						break
2860					}
2861					if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
2862						return false
2863					}
2864					skip_line(parser)
2865				} else if parser.mark.index >= seen {
2866					if len(text) == 0 {
2867						start_mark = parser.mark
2868					}
2869					text = read(parser, text)
2870				} else {
2871					skip(parser)
2872				}
2873			}
2874		}
2875		break
2876	}
2877	if len(text) > 0 {
2878		parser.comments = append(parser.comments, yaml_comment_t{
2879			token_mark: token_mark,
2880			start_mark: start_mark,
2881			line:       text,
2882		})
2883	}
2884	return true
2885}
2886
2887func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool {
2888	token := parser.tokens[len(parser.tokens)-1]
2889
2890	if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 {
2891		token = parser.tokens[len(parser.tokens)-2]
2892	}
2893
2894	var token_mark = token.start_mark
2895	var start_mark yaml_mark_t
2896	var next_indent = parser.indent
2897	if next_indent < 0 {
2898		next_indent = 0
2899	}
2900
2901	var recent_empty = false
2902	var first_empty = parser.newlines <= 1
2903
2904	var line = parser.mark.line
2905	var column = parser.mark.column
2906
2907	var text []byte
2908
2909	// The foot line is the place where a comment must start to
2910	// still be considered as a foot of the prior content.
2911	// If there's some content in the currently parsed line, then
2912	// the foot is the line below it.
2913	var foot_line = -1
2914	if scan_mark.line > 0 {
2915		foot_line = parser.mark.line - parser.newlines + 1
2916		if parser.newlines == 0 && parser.mark.column > 1 {
2917			foot_line++
2918		}
2919	}
2920
2921	var peek = 0
2922	for ; peek < 512; peek++ {
2923		if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
2924			break
2925		}
2926		column++
2927		if is_blank(parser.buffer, parser.buffer_pos+peek) {
2928			continue
2929		}
2930		c := parser.buffer[parser.buffer_pos+peek]
2931		var close_flow = parser.flow_level > 0 && (c == ']' || c == '}')
2932		if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) {
2933			// Got line break or terminator.
2934			if close_flow || !recent_empty {
2935				if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) {
2936					// This is the first empty line and there were no empty lines before,
2937					// so this initial part of the comment is a foot of the prior token
2938					// instead of being a head for the following one. Split it up.
2939					// Alternatively, this might also be the last comment inside a flow
2940					// scope, so it must be a footer.
2941					if len(text) > 0 {
2942						if start_mark.column-1 < next_indent {
2943							// If dedented it's unrelated to the prior token.
2944							token_mark = start_mark
2945						}
2946						parser.comments = append(parser.comments, yaml_comment_t{
2947							scan_mark:  scan_mark,
2948							token_mark: token_mark,
2949							start_mark: start_mark,
2950							end_mark:   yaml_mark_t{parser.mark.index + peek, line, column},
2951							foot:       text,
2952						})
2953						scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
2954						token_mark = scan_mark
2955						text = nil
2956					}
2957				} else {
2958					if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 {
2959						text = append(text, '\n')
2960					}
2961				}
2962			}
2963			if !is_break(parser.buffer, parser.buffer_pos+peek) {
2964				break
2965			}
2966			first_empty = false
2967			recent_empty = true
2968			column = 0
2969			line++
2970			continue
2971		}
2972
2973		if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) {
2974			// The comment at the different indentation is a foot of the
2975			// preceding data rather than a head of the upcoming one.
2976			parser.comments = append(parser.comments, yaml_comment_t{
2977				scan_mark:  scan_mark,
2978				token_mark: token_mark,
2979				start_mark: start_mark,
2980				end_mark:   yaml_mark_t{parser.mark.index + peek, line, column},
2981				foot:       text,
2982			})
2983			scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
2984			token_mark = scan_mark
2985			text = nil
2986		}
2987
2988		if parser.buffer[parser.buffer_pos+peek] != '#' {
2989			break
2990		}
2991
2992		if len(text) == 0 {
2993			start_mark = yaml_mark_t{parser.mark.index + peek, line, column}
2994		} else {
2995			text = append(text, '\n')
2996		}
2997
2998		recent_empty = false
2999
3000		// Consume until after the consumed comment line.
3001		seen := parser.mark.index + peek
3002		for {
3003			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
3004				return false
3005			}
3006			if is_breakz(parser.buffer, parser.buffer_pos) {
3007				if parser.mark.index >= seen {
3008					break
3009				}
3010				if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
3011					return false
3012				}
3013				skip_line(parser)
3014			} else if parser.mark.index >= seen {
3015				text = read(parser, text)
3016			} else {
3017				skip(parser)
3018			}
3019		}
3020
3021		peek = 0
3022		column = 0
3023		line = parser.mark.line
3024		next_indent = parser.indent
3025		if next_indent < 0 {
3026			next_indent = 0
3027		}
3028	}
3029
3030	if len(text) > 0 {
3031		parser.comments = append(parser.comments, yaml_comment_t{
3032			scan_mark:  scan_mark,
3033			token_mark: start_mark,
3034			start_mark: start_mark,
3035			end_mark:   yaml_mark_t{parser.mark.index + peek - 1, line, column},
3036			head:       text,
3037		})
3038	}
3039	return true
3040}