Rev 30 | Blame | Compare with Previous | Last modification | View Log | RSS feed
/** Licensed to the Apache Software Foundation (ASF) under one* or more contributor license agreements. See the NOTICE file* distributed with this work for additional information* regarding copyright ownership. The ASF licenses this file* to you under the Apache License, Version 2.0 (the* "License"); you may not use this file except in compliance* with the License. You may obtain a copy of the License at** http://www.apache.org/licenses/LICENSE-2.0** Unless required by applicable law or agreed to in writing,* software distributed under the License is distributed on an* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY* KIND, either express or implied. See the License for the* specific language governing permissions and limitations* under the License.*/#include "TCompactProtocol.h"#include <config.h>#include <limits>/** TCompactProtocol::i*ToZigzag depend on the fact that the right shift* operator on a signed integer is an arithmetic (sign-extending) shift.* If this is not the case, the current implementation will not work.* If anyone encounters this error, we can try to figure out the best* way to implement an arithmetic right shift on their platform.*/#if !defined(SIGNED_RIGHT_SHIFT_IS) || !defined(ARITHMETIC_RIGHT_SHIFT)# error "Unable to determine the behavior of a signed right shift"#endif#if SIGNED_RIGHT_SHIFT_IS != ARITHMETIC_RIGHT_SHIFT# error "TCompactProtocol currenly only works if a signed right shift is arithmetic"#endif#ifdef __GNUC__#define UNLIKELY(val) (__builtin_expect((val), 0))#else#define UNLIKELY(val) (val)#endifnamespace apache { namespace thrift { namespace protocol {const int8_t TCompactProtocol::TTypeToCType[16] = {CT_STOP, // T_STOP0, // unusedCT_BOOLEAN_TRUE, // T_BOOLCT_BYTE, // T_BYTECT_DOUBLE, // T_DOUBLE0, // unusedCT_I16, // T_I160, // unusedCT_I32, // T_I320, // unusedCT_I64, // T_I64CT_BINARY, // T_STRINGCT_STRUCT, // T_STRUCTCT_MAP, // T_MAPCT_SET, // T_SETCT_LIST, // T_LIST};uint32_t TCompactProtocol::writeMessageBegin(const std::string& name,const TMessageType messageType,const int32_t seqid) {uint32_t wsize = 0;wsize += writeByte(PROTOCOL_ID);wsize += writeByte((VERSION_N & VERSION_MASK) | (((int32_t)messageType << TYPE_SHIFT_AMOUNT) & TYPE_MASK));wsize += writeVarint32(seqid);wsize += writeString(name);return wsize;}/*** Write a field header containing the field id and field type. If the* difference between the current field id and the last one is small (< 15),* then the field id will be encoded in the 4 MSB as a delta. Otherwise, the* field id will follow the type header as a zigzag varint.*/uint32_t TCompactProtocol::writeFieldBegin(const char* name,const TType fieldType,const int16_t fieldId) {if (fieldType == T_BOOL) {booleanField_.name = name;booleanField_.fieldType = fieldType;booleanField_.fieldId = fieldId;} else {return writeFieldBeginInternal(name, fieldType, fieldId, -1);}return 0;}/*** Write the STOP symbol so we know there are no more fields in this struct.*/uint32_t TCompactProtocol::writeFieldStop() {return writeByte(T_STOP);}/*** Write a struct begin. This doesn't actually put anything on the wire. We* use it as an opportunity to put special placeholder markers on the field* stack so we can get the field id deltas correct.*/uint32_t TCompactProtocol::writeStructBegin(const char* name) {lastField_.push(lastFieldId_);lastFieldId_ = 0;return 0;}/*** Write a struct end. This doesn't actually put anything on the wire. We use* this as an opportunity to pop the last field from the current struct off* of the field stack.*/uint32_t TCompactProtocol::writeStructEnd() {lastFieldId_ = lastField_.top();lastField_.pop();return 0;}/*** Write a List header.*/uint32_t TCompactProtocol::writeListBegin(const TType elemType,const uint32_t size) {return writeCollectionBegin(elemType, size);}/*** Write a set header.*/uint32_t TCompactProtocol::writeSetBegin(const TType elemType,const uint32_t size) {return writeCollectionBegin(elemType, size);}/*** Write a map header. If the map is empty, omit the key and value type* headers, as we don't need any additional information to skip it.*/uint32_t TCompactProtocol::writeMapBegin(const TType keyType,const TType valType,const uint32_t size) {uint32_t wsize = 0;if (size == 0) {wsize += writeByte(0);} else {wsize += writeVarint32(size);wsize += writeByte(getCompactType(keyType) << 4 | getCompactType(valType));}return wsize;}/*** Write a boolean value. Potentially, this could be a boolean field, in* which case the field header info isn't written yet. If so, decide what the* right type header is for the value and then write the field header.* Otherwise, write a single byte.*/uint32_t TCompactProtocol::writeBool(const bool value) {uint32_t wsize = 0;if (booleanField_.name != NULL) {// we haven't written the field header yetwsize += writeFieldBeginInternal(booleanField_.name,booleanField_.fieldType,booleanField_.fieldId,value ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);booleanField_.name = NULL;} else {// we're not part of a field, so just write the valuewsize += writeByte(value ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);}return wsize;}uint32_t TCompactProtocol::writeByte(const int8_t byte) {trans_->write((uint8_t*)&byte, 1);return 1;}/*** Write an i16 as a zigzag varint.*/uint32_t TCompactProtocol::writeI16(const int16_t i16) {return writeVarint32(i32ToZigzag(i16));}/*** Write an i32 as a zigzag varint.*/uint32_t TCompactProtocol::writeI32(const int32_t i32) {return writeVarint32(i32ToZigzag(i32));}/*** Write an i64 as a zigzag varint.*/uint32_t TCompactProtocol::writeI64(const int64_t i64) {return writeVarint64(i64ToZigzag(i64));}/*** Write a double to the wire as 8 bytes.*/uint32_t TCompactProtocol::writeDouble(const double dub) {BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);uint64_t bits = bitwise_cast<uint64_t>(dub);bits = htolell(bits);trans_->write((uint8_t*)&bits, 8);return 8;}/*** Write a string to the wire with a varint size preceeding.*/uint32_t TCompactProtocol::writeString(const std::string& str) {return writeBinary(str);}uint32_t TCompactProtocol::writeBinary(const std::string& str) {uint32_t ssize = str.size();uint32_t wsize = writeVarint32(ssize) + ssize;trans_->write((uint8_t*)str.data(), ssize);return wsize;}//// Internal Writing methods///*** The workhorse of writeFieldBegin. It has the option of doing a* 'type override' of the type header. This is used specifically in the* boolean field case.*/int32_t TCompactProtocol::writeFieldBeginInternal(const char* name,const TType fieldType,const int16_t fieldId,int8_t typeOverride) {uint32_t wsize = 0;// if there's a type override, use that.int8_t typeToWrite = (typeOverride == -1 ? getCompactType(fieldType) : typeOverride);// check if we can use delta encoding for the field idif (fieldId > lastFieldId_ && fieldId - lastFieldId_ <= 15) {// write them togetherwsize += writeByte((fieldId - lastFieldId_) << 4 | typeToWrite);} else {// write them separatewsize += writeByte(typeToWrite);wsize += writeI16(fieldId);}lastFieldId_ = fieldId;return wsize;}/*** Abstract method for writing the start of lists and sets. List and sets on* the wire differ only by the type indicator.*/uint32_t TCompactProtocol::writeCollectionBegin(int8_t elemType, int32_t size) {uint32_t wsize = 0;if (size <= 14) {wsize += writeByte(size << 4 | getCompactType(elemType));} else {wsize += writeByte(0xf0 | getCompactType(elemType));wsize += writeVarint32(size);}return wsize;}/*** Write an i32 as a varint. Results in 1-5 bytes on the wire.*/uint32_t TCompactProtocol::writeVarint32(uint32_t n) {uint8_t buf[5];uint32_t wsize = 0;while (true) {if ((n & ~0x7F) == 0) {buf[wsize++] = (int8_t)n;break;} else {buf[wsize++] = (int8_t)((n & 0x7F) | 0x80);n >>= 7;}}trans_->write(buf, wsize);return wsize;}/*** Write an i64 as a varint. Results in 1-10 bytes on the wire.*/uint32_t TCompactProtocol::writeVarint64(uint64_t n) {uint8_t buf[10];uint32_t wsize = 0;while (true) {if ((n & ~0x7FL) == 0) {buf[wsize++] = (int8_t)n;break;} else {buf[wsize++] = (int8_t)((n & 0x7F) | 0x80);n >>= 7;}}trans_->write(buf, wsize);return wsize;}/*** Convert l into a zigzag long. This allows negative numbers to be* represented compactly as a varint.*/uint64_t TCompactProtocol::i64ToZigzag(const int64_t l) {return (l << 1) ^ (l >> 63);}/*** Convert n into a zigzag int. This allows negative numbers to be* represented compactly as a varint.*/uint32_t TCompactProtocol::i32ToZigzag(const int32_t n) {return (n << 1) ^ (n >> 31);}/*** Given a TType value, find the appropriate TCompactProtocol.Type value*/int8_t TCompactProtocol::getCompactType(int8_t ttype) {return TTypeToCType[ttype];}//// Reading Methods///*** Read a message header.*/uint32_t TCompactProtocol::readMessageBegin(std::string& name,TMessageType& messageType,int32_t& seqid) {uint32_t rsize = 0;int8_t protocolId;int8_t versionAndType;int8_t version;rsize += readByte(protocolId);if (protocolId != PROTOCOL_ID) {throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol identifier");}rsize += readByte(versionAndType);version = (int8_t)(versionAndType & VERSION_MASK);if (version != VERSION_N) {throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol version");}messageType = (TMessageType)((versionAndType >> TYPE_SHIFT_AMOUNT) & 0x03);rsize += readVarint32(seqid);rsize += readString(name);return rsize;}/*** Read a struct begin. There's nothing on the wire for this, but it is our* opportunity to push a new struct begin marker on the field stack.*/uint32_t TCompactProtocol::readStructBegin(std::string& name) {name = "";lastField_.push(lastFieldId_);lastFieldId_ = 0;return 0;}/*** Doesn't actually consume any wire data, just removes the last field for* this struct from the field stack.*/uint32_t TCompactProtocol::readStructEnd() {lastFieldId_ = lastField_.top();lastField_.pop();return 0;}/*** Read a field header off the wire.*/uint32_t TCompactProtocol::readFieldBegin(std::string& name,TType& fieldType,int16_t& fieldId) {uint32_t rsize = 0;int8_t byte;int8_t type;rsize += readByte(byte);type = (byte & 0x0f);// if it's a stop, then we can return immediately, as the struct is over.if (type == T_STOP) {fieldType = T_STOP;fieldId = 0;return rsize;}// mask off the 4 MSB of the type header. it could contain a field id delta.int16_t modifier = (int16_t)(((uint8_t)byte & 0xf0) >> 4);if (modifier == 0) {// not a delta, look ahead for the zigzag varint field id.rsize += readI16(fieldId);} else {fieldId = (int16_t)(lastFieldId_ + modifier);}fieldType = getTType(type);// if this happens to be a boolean field, the value is encoded in the typeif (type == CT_BOOLEAN_TRUE || type == CT_BOOLEAN_FALSE) {// save the boolean value in a special instance variable.boolValue_.hasBoolValue = true;boolValue_.boolValue = (type == CT_BOOLEAN_TRUE ? true : false);}// push the new field onto the field stack so we can keep the deltas going.lastFieldId_ = fieldId;return rsize;}/*** Read a map header off the wire. If the size is zero, skip reading the key* and value type. This means that 0-length maps will yield TMaps without the* "correct" types.*/uint32_t TCompactProtocol::readMapBegin(TType& keyType,TType& valType,uint32_t& size) {uint32_t rsize = 0;int8_t kvType = 0;int32_t msize = 0;rsize += readVarint32(msize);if (msize != 0)rsize += readByte(kvType);if (msize < 0) {throw TProtocolException(TProtocolException::NEGATIVE_SIZE);} else if (container_limit_ && msize > container_limit_) {throw TProtocolException(TProtocolException::SIZE_LIMIT);}keyType = getTType((int8_t)((uint8_t)kvType >> 4));valType = getTType((int8_t)((uint8_t)kvType & 0xf));size = (uint32_t)msize;return rsize;}/*** Read a list header off the wire. If the list size is 0-14, the size will* be packed into the element type header. If it's a longer list, the 4 MSB* of the element type header will be 0xF, and a varint will follow with the* true size.*/uint32_t TCompactProtocol::readListBegin(TType& elemType,uint32_t& size) {int8_t size_and_type;uint32_t rsize = 0;int32_t lsize;rsize += readByte(size_and_type);lsize = ((uint8_t)size_and_type >> 4) & 0x0f;if (lsize == 15) {rsize += readVarint32(lsize);}if (lsize < 0) {throw TProtocolException(TProtocolException::NEGATIVE_SIZE);} else if (container_limit_ && lsize > container_limit_) {throw TProtocolException(TProtocolException::SIZE_LIMIT);}elemType = getTType((int8_t)(size_and_type & 0x0f));size = (uint32_t)lsize;return rsize;}/*** Read a set header off the wire. If the set size is 0-14, the size will* be packed into the element type header. If it's a longer set, the 4 MSB* of the element type header will be 0xF, and a varint will follow with the* true size.*/uint32_t TCompactProtocol::readSetBegin(TType& elemType,uint32_t& size) {return readListBegin(elemType, size);}/*** Read a boolean off the wire. If this is a boolean field, the value should* already have been read during readFieldBegin, so we'll just consume the* pre-stored value. Otherwise, read a byte.*/uint32_t TCompactProtocol::readBool(bool& value) {if (boolValue_.hasBoolValue == true) {value = boolValue_.boolValue;boolValue_.hasBoolValue = false;return 0;} else {int8_t val;readByte(val);value = (val == CT_BOOLEAN_TRUE);return 1;}}/*** Read a single byte off the wire. Nothing interesting here.*/uint32_t TCompactProtocol::readByte(int8_t& byte) {uint8_t b[1];trans_->readAll(b, 1);byte = *(int8_t*)b;return 1;}/*** Read an i16 from the wire as a zigzag varint.*/uint32_t TCompactProtocol::readI16(int16_t& i16) {int32_t value;uint32_t rsize = readVarint32(value);i16 = (int16_t)zigzagToI32(value);return rsize;}/*** Read an i32 from the wire as a zigzag varint.*/uint32_t TCompactProtocol::readI32(int32_t& i32) {int32_t value;uint32_t rsize = readVarint32(value);i32 = zigzagToI32(value);return rsize;}/*** Read an i64 from the wire as a zigzag varint.*/uint32_t TCompactProtocol::readI64(int64_t& i64) {int64_t value;uint32_t rsize = readVarint64(value);i64 = zigzagToI64(value);return rsize;}/*** No magic here - just read a double off the wire.*/uint32_t TCompactProtocol::readDouble(double& dub) {BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);uint64_t bits;uint8_t b[8];trans_->readAll(b, 8);bits = *(uint64_t*)b;bits = letohll(bits);dub = bitwise_cast<double>(bits);return 8;}uint32_t TCompactProtocol::readString(std::string& str) {return readBinary(str);}/*** Read a byte[] from the wire.*/uint32_t TCompactProtocol::readBinary(std::string& str) {int32_t rsize = 0;int32_t size;rsize += readVarint32(size);// Catch empty string caseif (size == 0) {str = "";return rsize;}// Catch error casesif (size < 0) {throw TProtocolException(TProtocolException::NEGATIVE_SIZE);}if (string_limit_ > 0 && size > string_limit_) {throw TProtocolException(TProtocolException::SIZE_LIMIT);}// Use the heap here to prevent stack overflow for v. large stringsif (size > string_buf_size_ || string_buf_ == NULL) {void* new_string_buf = std::realloc(string_buf_, (uint32_t)size);if (new_string_buf == NULL) {throw TProtocolException(TProtocolException::UNKNOWN, "Out of memory in TCompactProtocol::readString");}string_buf_ = (uint8_t*)new_string_buf;string_buf_size_ = size;}trans_->readAll(string_buf_, size);str.assign((char*)string_buf_, size);return rsize + (uint32_t)size;}/*** Read an i32 from the wire as a varint. The MSB of each byte is set* if there is another byte to follow. This can read up to 5 bytes.*/uint32_t TCompactProtocol::readVarint32(int32_t& i32) {int64_t val;uint32_t rsize = readVarint64(val);i32 = (int32_t)val;return rsize;}/*** Read an i64 from the wire as a proper varint. The MSB of each byte is set* if there is another byte to follow. This can read up to 10 bytes.*/uint32_t TCompactProtocol::readVarint64(int64_t& i64) {uint32_t rsize = 0;uint64_t val = 0;int shift = 0;uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.uint32_t buf_size = sizeof(buf);const uint8_t* borrowed = trans_->borrow(buf, &buf_size);// Fast path.if (borrowed != NULL) {while (true) {uint8_t byte = borrowed[rsize];rsize++;val |= (uint64_t)(byte & 0x7f) << shift;shift += 7;if (!(byte & 0x80)) {i64 = val;trans_->consume(rsize);return rsize;}// Have to check for invalid data so we don't crash.if (UNLIKELY(rsize == sizeof(buf))) {throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");}}}// Slow path.else {while (true) {uint8_t byte;rsize += trans_->readAll(&byte, 1);val |= (uint64_t)(byte & 0x7f) << shift;shift += 7;if (!(byte & 0x80)) {i64 = val;return rsize;}// Might as well check for invalid data on the slow path too.if (UNLIKELY(rsize >= sizeof(buf))) {throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");}}}}/*** Convert from zigzag int to int.*/int32_t TCompactProtocol::zigzagToI32(uint32_t n) {return (n >> 1) ^ -(n & 1);}/*** Convert from zigzag long to long.*/int64_t TCompactProtocol::zigzagToI64(uint64_t n) {return (n >> 1) ^ -(n & 1);}TType TCompactProtocol::getTType(int8_t type) {switch (type) {case T_STOP:return T_STOP;case CT_BOOLEAN_FALSE:case CT_BOOLEAN_TRUE:return T_BOOL;case CT_BYTE:return T_BYTE;case CT_I16:return T_I16;case CT_I32:return T_I32;case CT_I64:return T_I64;case CT_DOUBLE:return T_DOUBLE;case CT_BINARY:return T_STRING;case CT_LIST:return T_LIST;case CT_SET:return T_SET;case CT_MAP:return T_MAP;case CT_STRUCT:return T_STRUCT;default:throw TException("don't know what type: " + type);}return T_STOP;}}}} // apache::thrift::protocol