string_helpers.cpp 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. // Part of the Carbon Language project, under the Apache License v2.0 with LLVM
  2. // Exceptions. See /LICENSE for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "common/string_helpers.h"
  5. #include "common/check.h"
  6. #include "llvm/ADT/StringExtras.h"
  7. namespace Carbon {
  8. // Carbon only takes uppercase hex input.
  9. static auto FromHex(char c) -> std::optional<char> {
  10. if (c >= '0' && c <= '9') {
  11. return c - '0';
  12. }
  13. if (c >= 'A' && c <= 'F') {
  14. return 10 + c - 'A';
  15. }
  16. return std::nullopt;
  17. }
  18. auto UnescapeStringLiteral(llvm::StringRef source)
  19. -> std::optional<std::string> {
  20. std::string ret;
  21. ret.reserve(source.size());
  22. size_t i = 0;
  23. while (i < source.size()) {
  24. char c = source[i];
  25. switch (c) {
  26. case '\\':
  27. ++i;
  28. if (i == source.size()) {
  29. return std::nullopt;
  30. }
  31. switch (source[i]) {
  32. case 'n':
  33. ret.push_back('\n');
  34. break;
  35. case 'r':
  36. ret.push_back('\r');
  37. break;
  38. case 't':
  39. ret.push_back('\t');
  40. break;
  41. case '0':
  42. if (i + 1 < source.size() && llvm::isDigit(source[i + 1])) {
  43. // \0[0-9] is reserved.
  44. return std::nullopt;
  45. }
  46. ret.push_back('\0');
  47. break;
  48. case '"':
  49. ret.push_back('"');
  50. break;
  51. case '\'':
  52. ret.push_back('\'');
  53. break;
  54. case '\\':
  55. ret.push_back('\\');
  56. break;
  57. case 'x': {
  58. i += 2;
  59. if (i >= source.size()) {
  60. return std::nullopt;
  61. }
  62. std::optional<char> c1 = FromHex(source[i - 1]);
  63. std::optional<char> c2 = FromHex(source[i]);
  64. if (c1 == std::nullopt || c2 == std::nullopt) {
  65. return std::nullopt;
  66. }
  67. ret.push_back(16 * *c1 + *c2);
  68. break;
  69. }
  70. case 'u':
  71. FATAL() << "\\u is not yet supported in string literals";
  72. default:
  73. // Unsupported.
  74. return std::nullopt;
  75. }
  76. break;
  77. case '\t':
  78. // Disallow non-` ` horizontal whitespace:
  79. // https://github.com/carbon-language/carbon-lang/blob/trunk/docs/design/lexical_conventions/whitespace.md
  80. // TODO: This doesn't handle unicode whitespace.
  81. return std::nullopt;
  82. default:
  83. ret.push_back(c);
  84. break;
  85. }
  86. ++i;
  87. }
  88. return ret;
  89. }
  90. } // namespace Carbon