libunibreak  4.1
linebreakdef.h
Go to the documentation of this file.
1 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 
3 /*
4  * Line breaking in a Unicode sequence. Designed to be used in a
5  * generic text renderer.
6  *
7  * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
8  * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9  *
10  * This software is provided 'as-is', without any express or implied
11  * warranty. In no event will the author be held liable for any damages
12  * arising from the use of this software.
13  *
14  * Permission is granted to anyone to use this software for any purpose,
15  * including commercial applications, and to alter it and redistribute
16  * it freely, subject to the following restrictions:
17  *
18  * 1. The origin of this software must not be misrepresented; you must
19  * not claim that you wrote the original software. If you use this
20  * software in a product, an acknowledgement in the product
21  * documentation would be appreciated but is not required.
22  * 2. Altered source versions must be plainly marked as such, and must
23  * not be misrepresented as being the original software.
24  * 3. This notice may not be removed or altered from any source
25  * distribution.
26  *
27  * The main reference is Unicode Standard Annex 14 (UAX #14):
28  * <URL:http://www.unicode.org/reports/tr14/>
29  *
30  * When this library was designed, this annex was at Revision 19, for
31  * Unicode 5.0.0:
32  * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33  *
34  * This library has been updated according to Revision 43, for
35  * Unicode 12.0.0:
36  * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
37  *
38  * The Unicode Terms of Use are available at
39  * <URL:http://www.unicode.org/copyright.html>
40  */
41 
52 #include "unibreakdef.h"
53 
59 {
60  /* This is used to signal an error condition. */
63  /* The following break classes are treated in the pair table. */
97  /* The following break class is treated in the pair table, but it is
98  * not part of Table 2 of UAX #14-37. */
101  /* The following break classes are not treated in the pair table */
112 };
113 
119 {
123 };
124 
130 {
131  const char *lang;
132  size_t namelen;
133  const struct LineBreakProperties *lbp;
134 };
135 
141 {
142  const char *lang;
143  const struct LineBreakProperties *lbpLang;
148  bool fLb8aZwj;
151  int cLb30aRI;
152 };
153 
154 /* Declarations */
155 extern const struct LineBreakProperties lb_prop_default[];
156 extern const struct LineBreakPropertiesLang lb_prop_lang_map[];
157 
158 /* Function Prototype */
160  struct LineBreakContext *lbpCtx,
161  utf32_t ch,
162  const char *lang);
164  struct LineBreakContext *lbpCtx,
165  utf32_t ch);
166 void set_linebreaks(
167  const void *s,
168  size_t len,
169  const char *lang,
170  char *brks,
171  get_next_char_t get_next_char);
Zero width joiner.
Definition: linebreakdef.h:95
Carriage return.
Definition: linebreakdef.h:105
bool fLb10LeadSpace
Flag for leading space (LB10)
Definition: linebreakdef.h:149
Struct for association of language-specific line breaking properties with language names...
Definition: linebreakdef.h:129
bool fLb8aZwj
Flag for ZWJ (LB8a)
Definition: linebreakdef.h:148
Contingent break.
Definition: linebreakdef.h:99
Ideographic.
Definition: linebreakdef.h:78
Break on either side (but not pair)
Definition: linebreakdef.h:83
Surrogates.
Definition: linebreakdef.h:109
Hangul LV.
Definition: linebreakdef.h:87
enum LineBreakClass prop
The line breaking property.
Definition: linebreakdef.h:122
Infix separator.
Definition: linebreakdef.h:72
Struct for entries of line break properties.
Definition: linebreakdef.h:118
Opening punctuation.
Definition: linebreakdef.h:64
size_t namelen
Length of name to match.
Definition: linebreakdef.h:132
Postfix.
Definition: linebreakdef.h:74
const char * lang
Language name.
Definition: linebreakdef.h:131
Emoji modifier.
Definition: linebreakdef.h:94
LineBreakClass
Line break classes.
Definition: linebreakdef.h:58
int lb_process_next_char(struct LineBreakContext *lbpCtx, utf32_t ch)
Updates LineBreakingContext for the next codepoint and returns the detected break.
Definition: linebreak.c:694
enum LineBreakClass lbcCur
Breaking class of current codepoint.
Definition: linebreakdef.h:145
Exclamation/Interrogation.
Definition: linebreakdef.h:70
Conditional Japanese starter.
Definition: linebreakdef.h:104
Numeric.
Definition: linebreakdef.h:75
Break after.
Definition: linebreakdef.h:81
Unknown.
Definition: linebreakdef.h:111
unsigned int utf32_t
Type for UTF-32 data points.
Definition: unibreakbase.h:49
utf32_t start
Start codepoint.
Definition: linebreakdef.h:120
Regional indicator.
Definition: linebreakdef.h:92
const struct LineBreakProperties * lbpLang
Pointer to LineBreakProperties.
Definition: linebreakdef.h:143
enum LineBreakClass lbcNew
Breaking class of next codepoint.
Definition: linebreakdef.h:146
South-East Asian.
Definition: linebreakdef.h:108
Closing parenthesis.
Definition: linebreakdef.h:66
Closing punctuation.
Definition: linebreakdef.h:65
utf32_t end
End codepoint, inclusive.
Definition: linebreakdef.h:121
const struct LineBreakProperties * lbp
Pointer to associated data.
Definition: linebreakdef.h:133
Alphabetic.
Definition: linebreakdef.h:76
Ambiguous (alphabetic or ideograph)
Definition: linebreakdef.h:102
void lb_init_break_context(struct LineBreakContext *lbpCtx, utf32_t ch, const char *lang)
Initializes line breaking context for a given language.
Definition: linebreak.c:663
Undefined.
Definition: linebreakdef.h:61
const struct LineBreakProperties lb_prop_default[]
Default line breaking properties as from the Unicode Web site.
Definition: linebreakdata.c:9
Word joiner.
Definition: linebreakdef.h:86
Line feed.
Definition: linebreakdef.h:106
void set_linebreaks(const void *s, size_t len, const char *lang, char *brks, get_next_char_t get_next_char)
Sets the line breaking information for a generic input string.
Definition: linebreak.c:767
Hangul T Jamo.
Definition: linebreakdef.h:91
const char * lang
Language name.
Definition: linebreakdef.h:142
Emoji base.
Definition: linebreakdef.h:93
Hangul V Jamo.
Definition: linebreakdef.h:90
utf32_t(* get_next_char_t)(const void *, size_t, size_t *)
Abstract function interface for ub_get_next_char_utf8, ub_get_next_char_utf16, and ub_get_next_char_u...
Definition: unibreakdef.h:65
Zero-width space.
Definition: linebreakdef.h:84
Break (mandatory)
Definition: linebreakdef.h:103
enum LineBreakClass lbcLast
Breaking class of last codepoint.
Definition: linebreakdef.h:147
Prefix.
Definition: linebreakdef.h:73
Ambiguous quotation.
Definition: linebreakdef.h:67
Hyphen.
Definition: linebreakdef.h:80
Inseparable characters.
Definition: linebreakdef.h:79
Combining marks.
Definition: linebreakdef.h:85
Space.
Definition: linebreakdef.h:110
Hebrew letter.
Definition: linebreakdef.h:77
Symbols allowing break after.
Definition: linebreakdef.h:71
Header file for private definitions in the libunibreak library.
bool fLb21aHebrew
Flag for Hebrew letters (LB21a)
Definition: linebreakdef.h:150
const struct LineBreakPropertiesLang lb_prop_lang_map[]
Association data of language-specific line breaking properties with language names.
Definition: linebreakdef.c:117
Hangul L Jamo.
Definition: linebreakdef.h:89
Context representing internal state of the line breaking algorithm.
Definition: linebreakdef.h:140
Next line.
Definition: linebreakdef.h:107
Break before.
Definition: linebreakdef.h:82
Non-starters.
Definition: linebreakdef.h:69
Glue.
Definition: linebreakdef.h:68
int cLb30aRI
Count of RI characters (LB30a)
Definition: linebreakdef.h:151
Hangul LVT.
Definition: linebreakdef.h:88