Compare commits

...

3 Commits

Author SHA1 Message Date
Lennart Poettering 2f38ead984
Merge 9391030e6e into 4b356c90dc 2024-11-23 10:41:12 -08:00
Lennart Poettering 9391030e6e pretty-print: don't use OSC 8 for incompatible URLs 2024-11-19 10:55:51 +01:00
Lennart Poettering 302e8bbce3 string-util: split out EOT check in strip_tab_ansi()
Let's unify the eot check in one place in order to make things more
readable.
2024-11-19 10:55:51 +01:00
3 changed files with 33 additions and 9 deletions

View File

@ -732,10 +732,12 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {
bool eot = i >= *ibuf + isz;
switch (state) {
case STATE_OTHER:
if (i >= *ibuf + isz) /* EOT */
if (eot)
break;
if (*i == '\r') {
@ -760,7 +762,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
case STATE_ESCAPE:
assert(n_carriage_returns == 0);
if (i >= *ibuf + isz) { /* EOT */
if (eot) {
fputc('\x1B', f);
advance_offsets(i - *ibuf, highlight, shift, 1);
break;
@ -782,8 +784,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
case STATE_CSI:
assert(n_carriage_returns == 0);
if (i >= *ibuf + isz || /* EOT … */
!strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
if (eot || !strchr("01234567890;m", *i)) { /* EOT or invalid chars in sequence */
fputc('\x1B', f);
fputc('[', f);
advance_offsets(i - *ibuf, highlight, shift, 2);
@ -800,8 +801,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
/* There are three kinds of OSC terminators: \x07, \x1b\x5c or \x9c. We only support
* the first two, because the last one is a valid UTF-8 codepoint and hence creates
* an ambiguity (many Terminal emulators refuse to support it as well). */
if (i >= *ibuf + isz || /* EOT … */
(!IN_SET(*i, '\x07', '\x1b') && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
if (eot || (!IN_SET(*i, '\x07', '\x1b') && !osc_char_is_valid(*i))) { /* EOT or invalid chars in sequence */
fputc('\x1B', f);
fputc(']', f);
advance_offsets(i - *ibuf, highlight, shift, 2);
@ -815,8 +815,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
break;
case STATE_OSC_CLOSING:
if (i >= *ibuf + isz || /* EOT … */
*i != '\x5c') { /* … or incomplete two-byte ST in sequence */
if (eot || *i != '\x5c') { /* EOT or incomplete two-byte ST in sequence */
fputc('\x1B', f);
fputc(']', f);
advance_offsets(i - *ibuf, highlight, shift, 2);

View File

@ -157,3 +157,9 @@ int terminal_is_pty_fd(int fd);
int pty_open_peer_racefree(int fd, int mode);
int pty_open_peer(int fd, int mode);
static inline bool osc_char_is_valid(char c) {
/* Checks whether the specified character is safe to be included inside an ANSI OSC sequence, as per
* ECMA-48 5th edition, section 8.3.89 */
return (unsigned char) c >= 32U && (unsigned char) c < 127;
}

View File

@ -76,6 +76,25 @@ bool urlify_enabled(void) {
#endif
}
static bool url_suitable_for_osc8(const char *url) {
assert(url);
/* Not all URLs are safe for inclusion in OSC 8 due to charset and length restrictions. Let's detect
* which ones those are */
/* If the URL is longer than 2K let's not try to do OSC 8. As per recommendation in
* https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda#length-limits */
if (strlen(url) > 2000)
return false;
/* OSC sequences may only contain chars from the 32..126 range, as per ECMA-48 */
for (const char *c = url; *c; c++)
if (!osc_char_is_valid(*c))
return false;
return true;
}
int terminal_urlify(const char *url, const char *text, char **ret) {
char *n;
@ -87,7 +106,7 @@ int terminal_urlify(const char *url, const char *text, char **ret) {
if (isempty(text))
text = url;
if (urlify_enabled())
if (urlify_enabled() && url_suitable_for_osc8(url))
n = strjoin(ANSI_OSC "8;;", url, ANSI_ST,
text,
ANSI_OSC "8;;" ANSI_ST);