edbee - Qt Editor Library v0.11.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
textcodecdetector.h
Go to the documentation of this file.
1// edbee - Copyright (c) 2012-2025 by Rick Blommers and contributors
2// SPDX-License-Identifier: MIT
3
4#pragma once
5
6#include "edbee/exports.h"
7
8
9class QByteArray;
10
11namespace edbee {
12
13class TextCodec;
14
32
33public:
34
36 static void setGlobalPreferedCodec( TextCodec* codec );
37
38
39 explicit TextCodecDetector( const QByteArray* buffer=0, TextCodec* preferedCodec=0 );
40 explicit TextCodecDetector( const char* buffer, int length=0, TextCodec* preferedCodec=0 );
41 virtual ~TextCodecDetector();
42
43
44 virtual TextCodec* detectCodec();
45
47 virtual void setBuffer( const char* buf, int length )
48 {
49 bufferRef_ = buf;
50 bufferLength_ = length;
51 }
52
54 virtual const char*buffer() const { return bufferRef_; }
55
57 virtual int bufferLength() { return bufferLength_; }
58
59 virtual void setPreferedCodec( TextCodec* codec=0 );
60 virtual TextCodec* preferedCodec() { return preferedCodecRef_; }
61
62
63 virtual void setFallbackCodec( TextCodec* codec=0 );
64 virtual TextCodec* fallbackCodec() const { return fallbackCodecRef_; }
65
66
67
68protected:
69
71 virtual bool isContinuationChar( char b) { return /*-128 <= b && */ b <= -65; }
72
74 virtual bool isTwoBytesSequence(char b) { return -64 <= b && b <= -33; }
75
77 virtual bool isThreeBytesSequence(char b) { return -32 <= b && b <= -17; }
78
80 virtual bool isFourBytesSequence(char b) { return -16 <= b && b <= -9; }
81
83 virtual bool isFiveBytesSequence(char b) { return -8 <= b && b <= -5; }
84
85 // If the byte has the form 1110xxx, then it's the first byte of a six-bytes sequence character.
86 virtual bool isSixBytesSequence(char b){ return -4 <= b && b <= -3; }
87
88public:
89 static bool hasUTF8Bom( const char* buffer, int length );
90 static bool hasUTF16LEBom( const char* buffer, int length );
91 static bool hasUTF16BEBom( const char* buffer, int length );
92 static bool hasUTF32LEBom( const char* buffer, int length );
93 static bool hasUTF32BEBom( const char* buffer, int length );
94
95
96private:
97
98 //const QByteArray *bufferRef_; ///< A reference to the current buffer of data
99 const char* bufferRef_;
100 int bufferLength_;
101
102 TextCodec* preferedCodecRef_;
103 TextCodec* fallbackCodecRef_;
104
105
106};
107
108} // edbee
TextCodecDetector(const QByteArray *buffer=0, TextCodec *preferedCodec=0)
Definition textcodecdetector.cpp:41
virtual bool isContinuationChar(char b)
If the byte has the form 10xxxxx, then it's a continuation byte of a multiple byte character;.
Definition textcodecdetector.h:71
virtual bool isFourBytesSequence(char b)
If the byte has the form 11110xx, then it's the first byte of a four-bytes sequence character.
Definition textcodecdetector.h:80
virtual TextCodec * fallbackCodec() const
Definition textcodecdetector.h:64
static TextCodec * globalPreferedCodec()
return the static global prefered codec
Definition textcodecdetector.cpp:25
virtual TextCodec * preferedCodec()
Definition textcodecdetector.h:60
virtual bool isFiveBytesSequence(char b)
If the byte has the form 11110xx, then it's the first byte of a five-bytes sequence character.
Definition textcodecdetector.h:83
virtual bool isSixBytesSequence(char b)
Definition textcodecdetector.h:86
virtual TextCodec * detectCodec()
Detects the encoding of the provided buffer. If Byte Order Markers are encountered at the beginning o...
Definition textcodecdetector.cpp:120
static void setGlobalPreferedCodec(TextCodec *codec)
Definition textcodecdetector.cpp:34
virtual bool isThreeBytesSequence(char b)
If the byte has the form 1110xxx, then it's the first byte of a three-bytes sequence character.
Definition textcodecdetector.h:77
virtual int bufferLength()
Returns the buffer length.
Definition textcodecdetector.h:57
virtual bool isTwoBytesSequence(char b)
If the byte has the form 110xxxx, then it's the first byte of a two-bytes sequence character.
Definition textcodecdetector.h:74
virtual const char * buffer() const
Returns the buffer reference.
Definition textcodecdetector.h:54
virtual void setBuffer(const char *buf, int length)
Sets the buffer reference.
Definition textcodecdetector.h:47
This class represents a single text codec The codec has a name and contains methods to create encoder...
Definition textcodec.h:38
#define EDBEE_EXPORT
Definition exports.h:15
QT Acessibility has an issue with reporting blank lines between elements lines. defining 'WINDOWS_EMP...
Definition commentcommand.cpp:20