Merge 9391030e6e into b7eefa1996

pretty-print: don't use OSC 8 for incompatible URLs
string-util: split out EOT check in strip_tab_ansi()
2024-11-20 23:00:52 -08:00 · 2024-11-19 10:55:51 +01:00 · 2024-11-19 10:55:51 +01:00
3 changed files with 33 additions and 9 deletions
--- a/src/basic/string-util.c
+++ b/src/basic/string-util.c
@ -732,10 +732,12 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {

        for (const char *i = *ibuf; i < *ibuf + isz + 1; i++) {

+                bool eot = i >= *ibuf + isz;
+
                switch (state) {

                case STATE_OTHER:
-                        if (i >= *ibuf + isz) /* EOT */
+                        if (eot)
                                break;

                        if (*i == '\r') {
@ -760,7 +762,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
                case STATE_ESCAPE:
                        assert(n_carriage_returns == 0);

-                        if (i >= *ibuf + isz) { /* EOT */
+                        if (eot) {
                                fputc('\x1B', f);
                                advance_offsets(i - *ibuf, highlight, shift, 1);
                                break;
@ -782,8 +784,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
                case STATE_CSI:
                        assert(n_carriage_returns == 0);

-                        if (i >= *ibuf + isz || /* EOT … */
-                            !strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
+                        if (eot || !strchr("01234567890;m", *i)) { /* EOT or invalid chars in sequence */
                                fputc('\x1B', f);
                                fputc('[', f);
                                advance_offsets(i - *ibuf, highlight, shift, 2);
@ -800,8 +801,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
                        /* There are three kinds of OSC terminators: \x07, \x1b\x5c or \x9c. We only support
                         * the first two, because the last one is a valid UTF-8 codepoint and hence creates
                         * an ambiguity (many Terminal emulators refuse to support it as well). */
-                        if (i >= *ibuf + isz || /* EOT … */
-                            (!IN_SET(*i, '\x07', '\x1b') && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
+                        if (eot || (!IN_SET(*i, '\x07', '\x1b') && !osc_char_is_valid(*i))) { /* EOT or invalid chars in sequence */
                                fputc('\x1B', f);
                                fputc(']', f);
                                advance_offsets(i - *ibuf, highlight, shift, 2);
@ -815,8 +815,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
                        break;

                case STATE_OSC_CLOSING:
-                        if (i >= *ibuf + isz || /* EOT … */
-                            *i != '\x5c') { /* … or incomplete two-byte ST in sequence */
+                        if (eot || *i != '\x5c') { /* EOT or incomplete two-byte ST in sequence */
                                fputc('\x1B', f);
                                fputc(']', f);
                                advance_offsets(i - *ibuf, highlight, shift, 2);
--- a/src/basic/terminal-util.h
+++ b/src/basic/terminal-util.h
@ -157,3 +157,9 @@ int terminal_is_pty_fd(int fd);

 int pty_open_peer_racefree(int fd, int mode);
 int pty_open_peer(int fd, int mode);
+
+static inline bool osc_char_is_valid(char c) {
+        /* Checks whether the specified character is safe to be included inside an ANSI OSC sequence, as per
+         * ECMA-48 5th edition, section 8.3.89 */
+        return (unsigned char) c >= 32U && (unsigned char) c < 127;
+}
--- a/src/shared/pretty-print.c
+++ b/src/shared/pretty-print.c
@ -76,6 +76,25 @@ bool urlify_enabled(void) {
 #endif
 }

+static bool url_suitable_for_osc8(const char *url) {
+        assert(url);
+
+        /* Not all URLs are safe for inclusion in OSC 8 due to charset and length restrictions. Let's detect
+         * which ones those are */
+
+        /* If the URL is longer than 2K let's not try to do OSC 8. As per recommendation in
+         * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda#length-limits */
+        if (strlen(url) > 2000)
+                return false;
+
+        /* OSC sequences may only contain chars from the 32..126 range, as per ECMA-48 */
+        for (const char *c = url; *c; c++)
+                if (!osc_char_is_valid(*c))
+                        return false;
+
+        return true;
+}
+
 int terminal_urlify(const char *url, const char *text, char **ret) {
        char *n;

@ -87,7 +106,7 @@ int terminal_urlify(const char *url, const char *text, char **ret) {
        if (isempty(text))
                text = url;

-        if (urlify_enabled())
+        if (urlify_enabled() && url_suitable_for_osc8(url))
                n = strjoin(ANSI_OSC "8;;", url, ANSI_ST,
                            text,
                            ANSI_OSC "8;;" ANSI_ST);
Author	SHA1	Message	Date
Lennart Poettering	8a112f735f	Merge `9391030e6e` into `b7eefa1996`	2024-11-20 23:00:52 -08:00
Lennart Poettering	9391030e6e	pretty-print: don't use OSC 8 for incompatible URLs	2024-11-19 10:55:51 +01:00
Lennart Poettering	302e8bbce3	string-util: split out EOT check in strip_tab_ansi() Let's unify the eot check in one place in order to make things more readable.	2024-11-19 10:55:51 +01:00