fail_char_literals_bad_encoding.carbon 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. //
  5. // AUTOUPDATE
  6. // TIP: To test this file alone, run:
  7. // TIP: bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/lex/testdata/fail_char_literals_bad_encoding.carbon
  8. // TIP: To dump output, run:
  9. // TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/lex/testdata/fail_char_literals_bad_encoding.carbon
  10. // CHECK:STDOUT: - filename: fail_char_literals_bad_encoding.carbon
  11. // CHECK:STDOUT: tokens:
  12. // Be careful when operating on this file: it contains invalid UTF-8 sequences
  13. // and Unicode control characters, and text editors are likely to corrupt it.
  14. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: control character in character literal; specify as escape sequence `\u{00}` [CharLiteralControlCharacter]
  15. // CHECK:STDERR: '<00>'
  16. // CHECK:STDERR: ^
  17. // CHECK:STDERR:
  18. '[[@0x00]]'
  19. // CHECK:STDOUT: - { index: 1, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\x00'", has_leading_space: true }
  20. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: control character in character literal; specify as escape sequence `\u{01}` [CharLiteralControlCharacter]
  21. // CHECK:STDERR: '<01>'
  22. // CHECK:STDERR: ^
  23. // CHECK:STDERR:
  24. '[[@0x01]]'
  25. // CHECK:STDOUT: - { index: 2, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\x01'", has_leading_space: true }
  26. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: control character in character literal; specify as escape sequence `\u{1F}` [CharLiteralControlCharacter]
  27. // CHECK:STDERR: '<1F>'
  28. // CHECK:STDERR: ^
  29. // CHECK:STDERR:
  30. '[[@0x1F]]'
  31. // CHECK:STDOUT: - { index: 3, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\x1F'", has_leading_space: true }
  32. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: control character in character literal; specify as escape sequence `\u{7F}` [CharLiteralControlCharacter]
  33. // CHECK:STDERR: '<7F>'
  34. // CHECK:STDERR: ^
  35. // CHECK:STDERR:
  36. '[[@0x7F]]'
  37. // CHECK:STDOUT: - { index: 4, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\x7F'", has_leading_space: true }
  38. // 0xC2 0x80 is U+0080.
  39. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: control character in character literal; specify as escape sequence `\u{80}` [CharLiteralControlCharacter]
  40. // CHECK:STDERR: '<C2><80>'
  41. // CHECK:STDERR: ^
  42. // CHECK:STDERR:
  43. '[[@0xC2]][[@0x80]]'
  44. // CHECK:STDOUT: - { index: 5, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xC2\x80'", has_leading_space: true }
  45. // 0xC2 0x9F is U+009F.
  46. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: control character in character literal; specify as escape sequence `\u{9F}` [CharLiteralControlCharacter]
  47. // CHECK:STDERR: '<C2><9F>'
  48. // CHECK:STDERR: ^
  49. // CHECK:STDERR:
  50. '[[@0xC2]][[@0x9F]]'
  51. // CHECK:STDOUT: - { index: 6, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xC2\x9F'", has_leading_space: true }
  52. // 0xC3 is invalid UTF-8 due to having a lead byte with no trail byte.
  53. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: incomplete UTF-8 [CharLiteralUnderflow]
  54. // CHECK:STDERR: '<C3>'
  55. // CHECK:STDERR: ^
  56. // CHECK:STDERR:
  57. '[[@0xC3]]'
  58. // CHECK:STDOUT: - { index: 7, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xC3'", has_leading_space: true }
  59. // 0xC3 0xC3 is invalid UTF-8 due to having two lead bytes in a row.
  60. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: invalid UTF-8 character [CharLiteralInvalidUTF8]
  61. // CHECK:STDERR: '<C3><C3>'
  62. // CHECK:STDERR: ^
  63. // CHECK:STDERR:
  64. '[[@0xC3]][[@0xC3]]'
  65. // CHECK:STDOUT: - { index: 8, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xC3\xC3'", has_leading_space: true }
  66. // This is invalid UTF-8 due to being too large.
  67. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: invalid UTF-8 character [CharLiteralInvalidUTF8]
  68. // CHECK:STDERR: '<FF><80><80><80><80><80><80><80><80>'
  69. // CHECK:STDERR: ^
  70. // CHECK:STDERR:
  71. '[[@0xFF]][[@0x80]][[@0x80]][[@0x80]][[@0x80]][[@0x80]][[@0x80]][[@0x80]][[@0x80]]'
  72. // CHECK:STDOUT: - { index: 9, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xFF\x80\x80\x80\x80\x80\x80\x80\x80'", has_leading_space: true }
  73. // 0xED 0xA0 0x80 would be U+D800 but is invalid UTF-8 due to encoding a high surrogate value.
  74. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: invalid UTF-8 character [CharLiteralInvalidUTF8]
  75. // CHECK:STDERR: '<ED><A0><80>'
  76. // CHECK:STDERR: ^
  77. // CHECK:STDERR:
  78. '[[@0xED]][[@0xA0]][[@0x80]]'
  79. // CHECK:STDOUT: - { index: 10, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xED\xA0\x80'", has_leading_space: true }
  80. // 0xED 0xBF 0xBF would be U+DFFF but is invalid UTF-8 due to encoding a low surrogate value.
  81. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: invalid UTF-8 character [CharLiteralInvalidUTF8]
  82. // CHECK:STDERR: '<ED><BF><BF>'
  83. // CHECK:STDERR: ^
  84. // CHECK:STDERR:
  85. '[[@0xED]][[@0xBF]][[@0xBF]]'
  86. // CHECK:STDOUT: - { index: 11, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xED\xBF\xBF'", has_leading_space: true }
  87. // 0xED 0xA0 0xBD 0xED 0xBA 0x80 would be U+D83D U+DE80, but those are both surrogates.
  88. // Treated as UTF-16, that would in turn produce U+1F680. But we don't do that.
  89. // CHECK:STDERR: fail_char_literals_bad_encoding.carbon:[[@LINE+4]]:1: error: invalid UTF-8 character [CharLiteralInvalidUTF8]
  90. // CHECK:STDERR: '<ED><A0><BD><ED><BA><80>'
  91. // CHECK:STDERR: ^
  92. // CHECK:STDERR:
  93. '[[@0xED]][[@0xA0]][[@0xBD]][[@0xED]][[@0xBA]][[@0x80]]'
  94. // CHECK:STDOUT: - { index: 12, kind: "Error", line: {{ *}}[[@LINE-1]], column: 1, indent: 1, spelling: "'\xED\xA0\xBD\xED\xBA\x80'", has_leading_space: true }