Download raw body.
finger: handle UTF-8 characters in .plan and .project
Hey,
I noticed that finger(8) didn't show non-ASCII UTF-8 characters
correctly. Here's a patch that fixes that, mostly lifted from
usr.bin/less/line.c . Correct sequences are well-tested, control
sequences are well-tested, incorrect sequences handling is slightly
tested.
- Correct sequence: printf '\xc3\xa1\n' >.plan . Will render an U+00E1,
LATIN SMALL LETTER A WITH ACUTE.
- Incorrect sequence: printf '\xdf\xdf\xdf\xdf\xdf\xdf\xdf\xdf' >.plan
will render 8 underscores _.
btw, is there a reason why the files are read one char at a time?
Should it be replace with getline?
Lucas
diff refs/heads/master ca749c632f5b1a52dc18ecfd675d1cfa7d153a2e
commit - ed8f5e8d82ace15e4cefca2c82941b15cb1a7830
commit + ca749c632f5b1a52dc18ecfd675d1cfa7d153a2e
blob - 11d5295c613f7ce1ebf6106d00ee8043e3e64ef1
blob + 9bed5bc14d805cb5e9f4d4d21015279695e9b63c
--- usr.bin/finger/finger.c
+++ usr.bin/finger/finger.c
@@ -63,6 +63,7 @@
#include <time.h>
#include <unistd.h>
#include <limits.h>
+#include <locale.h>
#include <err.h>
#include "finger.h"
#include "extern.h"
@@ -82,6 +83,8 @@ main(int argc, char *argv[])
char domain[HOST_NAME_MAX+1];
struct stat sb;
+ setlocale(LC_ALL, "");
+
oflag = 1; /* default to old "office" behavior */
while ((ch = getopt(argc, argv, "lmMpsho")) != -1)
blob - 03de1aa5cf3e7123cab82b51b222e3e1f0f27a64
blob + baeaf87b87c826f89c764645f36ef02d41f49735
--- usr.bin/finger/lprint.c
+++ usr.bin/finger/lprint.c
@@ -38,13 +38,15 @@
#include <ctype.h>
#include <paths.h>
#include <vis.h>
+#include <wchar.h>
#include "finger.h"
#include "extern.h"
-#define LINE_LEN 80
-#define TAB_LEN 8 /* 8 spaces between tabs */
-#define _PATH_PLAN ".plan"
-#define _PATH_PROJECT ".project"
+#define LINE_LEN 80
+#define MAX_UTF_CHAR_LEN 6
+#define TAB_LEN 8 /* 8 spaces between tabs */
+#define _PATH_PLAN ".plan"
+#define _PATH_PROJECT ".project"
void
lflag_print(void)
@@ -252,6 +254,10 @@ demi_print(char *str, int oddfield)
int
show_text(char *directory, char *file_name, char *header)
{
+ mbstate_t mbs;
+ char mbbuf[MAX_UTF_CHAR_LEN];
+ size_t i, mbidx, sz;
+ wchar_t wc;
int ch, lastc;
FILE *fp;
@@ -260,8 +266,40 @@ show_text(char *directory, char *file_name, char *head
if ((fp = fopen(tbuf, "r")) == NULL)
return (0);
(void)printf("%s\n", header);
- while ((ch = getc(fp)) != EOF)
- vputc(lastc = ch);
+ mbidx = 0;
+ while ((ch = getc(fp)) != EOF) {
+ lastc = ch;
+ mbbuf[mbidx++] = ch;
+ memset(&mbs, 0, sizeof(mbs));
+ sz = mbrtowc(&wc, mbbuf, mbidx, &mbs);
+
+ /* Incomplete UTF-8 sequence. */
+ if (sz == (size_t)-2)
+ continue;
+
+ /* Complete UTF-8 sequence. */
+ if (sz != (size_t)-1) {
+ if (sz > 1)
+ (void)putwchar(wc);
+ else
+ vputc(ch);
+ mbidx = 0;
+ continue;
+ }
+
+ /*
+ * Invalid UTF-8 sequence. vis the first buffered char, advance
+ * the buffer one char and retry.
+ */
+ vputc(mbbuf[0]);
+ memmove(mbbuf, mbbuf + 1, mbidx - 1);
+ mbidx--;
+ }
+
+ /* If the UTF-8 sequence is incomplete, vis all the buffered chars. */
+ for (i = 0; i < mbidx; i++)
+ vputc(lastc = mbbuf[i]);
+
if (lastc != '\n')
(void)putchar('\n');
(void)fclose(fp);
finger: handle UTF-8 characters in .plan and .project