summaryrefslogtreecommitdiff
path: root/man/10/rune
blob: 8c3b6324d60b48898bd6c1c5e899f14da7325359 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
.TH RUNE 10.2
.SH NAME
runetochar, chartorune, runelen, fullrune, utflen, utfrune, utfrrune, utfutf \- rune/UTF conversion
.SH SYNOPSIS
.ta \w'\fLchar*xx'u
.PP
.B
int	runetochar(char *s, Rune *r)
.PP
.B
int	chartorune(Rune *r, char *s)
.PP
.B
int	runelen(long r)
.PP
.B
int	fullrune(char *s, int n)
.PP
.B
int	utflen(char *s)
.PP
.B
char*	utfrune(char *s, long c)
.PP
.B
char*	utfrrune(char *s, long c)
.PP
.B
char*	utfutf(char *s1, char *s2)
.SH DESCRIPTION
These routines convert to and from a
.SM UTF
byte stream and runes.
.PP
.I Runetochar
copies one rune at
.I r
to at most
.B UTFmax
bytes starting at
.I s
and returns the number of bytes copied.
.BR UTFmax ,
defined as
.B 3
in
.BR <libc.h> ,
is the maximum number of bytes required to represent a rune.
.PP
.I Chartorune
copies at most
.B UTFmax
bytes starting at
.I s
to one rune at
.I r
and returns the number of bytes copied.
If the input is not exactly in
.SM UTF
format,
.I chartorune
will convert to 0x80 and return 1.
.PP
.I Runelen
returns the number of bytes
required to convert
.I r
into
.SM UTF.
.PP
.I Fullrune
returns 1 if the string
.I s
of length
.I n
is long enough to be decoded by
.I chartorune
and 0 otherwise.
This does not guarantee that the string
contains a legal
.SM UTF
encoding.
This routine is used by programs that
obtain input a byte at
a time and need to know when a full rune
has arrived.
.PP
The following routines are analogous to the
corresponding string routines with
.B utf
substituted for
.B str
and
.B rune
substituted for
.BR chr .
.PP
.I Utflen
returns the number of runes that
are represented by the
.SM UTF
string
.IR s .
.PP
.I Utfrune
.RI ( utfrrune )
returns a pointer to the first (last)
occurrence of rune
.I c
in the
.SM UTF
string
.IR s ,
or 0 if
.I c
does not occur in the string.
The NUL byte terminating a string is considered to
be part of the string
.IR s .
.PP
.I Utfutf
returns a pointer to the first occurrence of
the
.SM UTF
string
.I s2
as a
.SM UTF
substring of
.IR s1 ,
or 0 if there is none.
If
.I s2
is the null string,
.I utfutf
returns
.IR s1 .
.SH SOURCE
.B /libkern/rune.c
.br
.B /libkern/runestrlen.c
.br
.B /libkern/utflen.c
.br
.B /libkern/utfrrune.c
.br
.B /libkern/utfrune.c
.SH SEE ALSO
.IR convcs (2),
.IR utf (6)