edbee - Qt Editor Library
textcodecdetector.h
Go to the documentation of this file.
1 
6 #pragma once
7 
8 
9 class QByteArray;
10 
11 namespace edbee {
12 
13 class TextCodec;
14 
32 {
33 
34 public:
35 
37  static void setGlobalPreferedCodec( TextCodec* codec );
38 
39 
40  explicit TextCodecDetector( const QByteArray* buffer=0, TextCodec* preferedCodec=0 );
41  explicit TextCodecDetector( const char* buffer, int length=0, TextCodec* preferedCodec=0 );
42  virtual ~TextCodecDetector();
43 
44 
45  virtual TextCodec* detectCodec();
46 
48  virtual void setBuffer( const char* buf, int length )
49  {
50  bufferRef_ = buf;
51  bufferLength_ = length;
52  }
53 
55  virtual const char*buffer() const { return bufferRef_; }
56 
58  virtual int bufferLength() { return bufferLength_; }
59 
60  virtual void setPreferedCodec( TextCodec* codec=0 );
61  virtual TextCodec* preferedCodec() { return preferedCodecRef_; }
62 
63 
64  virtual void setFallbackCodec( TextCodec* codec=0 );
65  virtual TextCodec* fallbackCodec() const { return fallbackCodecRef_; }
66 
67 
68 
69 protected:
70 
72  virtual bool isContinuationChar( char b) { return /*-128 <= b && */ b <= -65; }
73 
75  virtual bool isTwoBytesSequence(char b) { return -64 <= b && b <= -33; }
76 
78  virtual bool isThreeBytesSequence(char b) { return -32 <= b && b <= -17; }
79 
81  virtual bool isFourBytesSequence(char b) { return -16 <= b && b <= -9; }
82 
84  virtual bool isFiveBytesSequence(char b) { return -8 <= b && b <= -5; }
85 
86  // If the byte has the form 1110xxx, then it's the first byte of a six-bytes sequence character.
87  virtual bool isSixBytesSequence(char b){ return -4 <= b && b <= -3; }
88 
89 public:
90  static bool hasUTF8Bom( const char* buffer, int length );
91  static bool hasUTF16LEBom( const char* buffer, int length );
92  static bool hasUTF16BEBom( const char* buffer, int length );
93  static bool hasUTF32LEBom( const char* buffer, int length );
94  static bool hasUTF32BEBom( const char* buffer, int length );
95 
96 
97 private:
98 
99  //const QByteArray *bufferRef_; ///< A reference to the current buffer of data
100  const char* bufferRef_;
101  int bufferLength_;
102 
103  TextCodec* preferedCodecRef_;
104  TextCodec* fallbackCodecRef_;
105 
106 
107 };
108 
109 } // edbee
virtual void setBuffer(const char *buf, int length)
Sets the buffer reference.
Definition: textcodecdetector.h:48
virtual void setFallbackCodec(TextCodec *codec=0)
Sets the fallback text codec.
Definition: textcodecdetector.cpp:83
virtual void setPreferedCodec(TextCodec *codec=0)
This method returns the prefered codec.
Definition: textcodecdetector.cpp:69
static bool hasUTF16BEBom(const char *buffer, int length)
Has a Byte Order Marker for UTF-16 Big Endian (utf-16 and ucs-2).
Definition: textcodecdetector.cpp:251
virtual bool isFourBytesSequence(char b)
If the byte has the form 11110xx, then it&#39;s the first byte of a four-bytes sequence character...
Definition: textcodecdetector.h:81
virtual bool isContinuationChar(char b)
If the byte has the form 10xxxxx, then it&#39;s a continuation byte of a multiple byte character;...
Definition: textcodecdetector.h:72
virtual bool isThreeBytesSequence(char b)
If the byte has the form 1110xxx, then it&#39;s the first byte of a three-bytes sequence character...
Definition: textcodecdetector.h:78
virtual bool isFiveBytesSequence(char b)
If the byte has the form 11110xx, then it&#39;s the first byte of a five-bytes sequence character...
Definition: textcodecdetector.h:84
static bool hasUTF32BEBom(const char *buffer, int length)
Has a Byte Order Marker for UTF-32 Big Endian.
Definition: textcodecdetector.cpp:267
TextCodecDetector(const QByteArray *buffer=0, TextCodec *preferedCodec=0)
Definition: textcodecdetector.cpp:43
virtual TextCodec * detectCodec()
Detects the encoding of the provided buffer. If Byte Order Markers are encountered at the beginning o...
Definition: textcodecdetector.cpp:122
This class represents a single text codec The codec has a name and contains methods to create encoder...
Definition: textcodec.h:37
This class is used to detect the encoding of a given string. The detector is based on the Java code o...
Definition: textcodecdetector.h:31
virtual ~TextCodecDetector()
Definition: textcodecdetector.cpp:63
virtual const char * buffer() const
Returns the buffer reference.
Definition: textcodecdetector.h:55
Copyright 2011-2013 - Reliable Bits Software by Blommers IT.
Definition: commentcommand.cpp:22
virtual int bufferLength()
Returns the buffer length.
Definition: textcodecdetector.h:58
static bool hasUTF16LEBom(const char *buffer, int length)
Has a Byte Order Marker for UTF-16 Low Endian (ucs-2le, ucs-4le, and ucs-16le).
Definition: textcodecdetector.cpp:243
static bool hasUTF8Bom(const char *buffer, int length)
Has a Byte Order Marker for UTF-8.
Definition: textcodecdetector.cpp:235
virtual bool isSixBytesSequence(char b)
Definition: textcodecdetector.h:87
virtual TextCodec * fallbackCodec() const
Definition: textcodecdetector.h:65
virtual TextCodec * preferedCodec()
Definition: textcodecdetector.h:61
static bool hasUTF32LEBom(const char *buffer, int length)
Has a Byte Order Marker for UTF-32 Low Endian.
Definition: textcodecdetector.cpp:260
virtual bool isTwoBytesSequence(char b)
If the byte has the form 110xxxx, then it&#39;s the first byte of a two-bytes sequence character...
Definition: textcodecdetector.h:75
static void setGlobalPreferedCodec(TextCodec *codec)
Definition: textcodecdetector.cpp:36
static TextCodec * globalPreferedCodec()
return the static global prefered codec
Definition: textcodecdetector.cpp:27