summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2016-09-11 19:11:38 +0200
committerJörg Frings-Fürst <debian@jff-webhosting.net>2016-09-11 19:11:38 +0200
commite568e8ecacb8509ae8fae067d05ad1e1fb78ec96 (patch)
treec68d5c3f51c770c1c244f63c4323334fb6474da1
parent1e1f807920bbdbf47c8ae1b913540c01cef425e6 (diff)
New upstream version 0.0+git20160911~9bf299cupstream/0.0+git20160911_9bf299c
-rw-r--r--.gitignore3
-rw-r--r--csv.h2251
-rw-r--r--debian/README.source12
-rw-r--r--debian/changelog20
-rw-r--r--debian/compat1
-rw-r--r--debian/control31
-rw-r--r--debian/copyright58
-rw-r--r--debian/doc-base13
-rw-r--r--debian/doc/Documentation.html89
-rw-r--r--debian/doc/Documentation.pdfbin37896 -> 0 bytes
-rwxr-xr-xdebian/doc/convert.sh3
-rw-r--r--debian/docs2
-rw-r--r--debian/install1
-rwxr-xr-xdebian/rules18
-rw-r--r--debian/source/format1
-rw-r--r--debian/source/include-binaries1
-rw-r--r--debian/watch7
17 files changed, 1220 insertions, 1291 deletions
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 6c270cd..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-.bzr
-.bzrignore
-.pc \ No newline at end of file
diff --git a/csv.h b/csv.h
index 4ff5a53..4d59618 100644
--- a/csv.h
+++ b/csv.h
@@ -1,32 +1,32 @@
-// Copyright: (2012-2014) Ben Strasser <code@ben-strasser.net>
+// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
// License: BSD-3
//
// All rights reserved.
//
-// Redistribution and use in source and binary forms, with or without
+// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
-// 1. Redistributions of source code must retain the above copyright notice,
+// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
-//2. Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
+//2. Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
//3. Neither the name of the copyright holder nor the names of its contributors
-// may be used to endorse or promote products derived from this software
+// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
#ifndef CSV_H
@@ -40,1030 +40,1219 @@
#include <cstdio>
#include <exception>
#ifndef CSV_IO_NO_THREAD
-#include <future>
+#include <mutex>
+#include <thread>
+#include <condition_variable>
#endif
+#include <memory>
#include <cassert>
#include <cerrno>
+#include <istream>
namespace io{
- ////////////////////////////////////////////////////////////////////////////
- // LineReader //
- ////////////////////////////////////////////////////////////////////////////
-
- namespace error{
- struct base : std::exception{
- virtual void format_error_message()const = 0;
-
- const char*what()const throw(){
- format_error_message();
- return error_message_buffer;
- }
-
- mutable char error_message_buffer[256];
- };
-
- const int max_file_name_length = 255;
-
- struct with_file_name{
- with_file_name(){
- std::memset(file_name, 0, max_file_name_length+1);
- }
-
- void set_file_name(const char*file_name){
- std::strncpy(this->file_name, file_name, max_file_name_length);
- this->file_name[max_file_name_length] = '\0';
- }
-
- char file_name[max_file_name_length+1];
- };
-
- struct with_file_line{
- with_file_line(){
- file_line = -1;
- }
-
- void set_file_line(int file_line){
- this->file_line = file_line;
- }
-
- int file_line;
- };
-
- struct with_errno{
- with_errno(){
- errno = 0;
- }
-
- void set_errno(int errno_value){
- this->errno_value = errno_value;
- }
-
- int errno_value;
- };
-
- struct can_not_open_file :
- base,
- with_file_name,
- with_errno{
- void format_error_message()const{
- if(errno_value != 0)
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Can not open file \"%s\" because \"%s\"."
- , file_name, std::strerror(errno_value));
- else
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Can not open file \"%s\"."
- , file_name);
- }
- };
-
- struct line_length_limit_exceeded :
- base,
- with_file_name,
- with_file_line{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
- , file_line, file_name);
- }
- };
- }
-
- class LineReader{
- private:
- static const int block_len = 1<<24;
- #ifndef CSV_IO_NO_THREAD
- std::future<int>bytes_read;
- #endif
- FILE*file;
- char*buffer;
- int data_begin;
- int data_end;
-
- char file_name[error::max_file_name_length+1];
- unsigned file_line;
-
- void open_file(const char*file_name){
- // We open the file in binary mode as it makes no difference under *nix
- // and under Windows we handle \r\n newlines ourself.
- file = std::fopen(file_name, "rb");
- if(file == 0){
- int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
- error::can_not_open_file err;
- err.set_errno(x);
- err.set_file_name(file_name);
- throw err;
- }
- }
-
- void init(){
- file_line = 0;
-
- // Tell the std library that we want to do the buffering ourself.
- std::setvbuf(file, 0, _IONBF, 0);
-
- try{
- buffer = new char[3*block_len];
- }catch(...){
- std::fclose(file);
- throw;
- }
-
- data_begin = 0;
- data_end = std::fread(buffer, 1, 2*block_len, file);
-
- // Ignore UTF-8 BOM
- if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
- data_begin = 3;
-
- #ifndef CSV_IO_NO_THREAD
- if(data_end == 2*block_len){
- bytes_read = std::async(std::launch::async, [=]()->int{
- return std::fread(buffer + 2*block_len, 1, block_len, file);
- });
- }
- #endif
- }
-
- public:
- LineReader() = delete;
- LineReader(const LineReader&) = delete;
- LineReader&operator=(const LineReader&) = delete;
-
- LineReader(const char*file_name, FILE*file):
- file(file){
- set_file_name(file_name);
- init();
- }
-
- LineReader(const std::string&file_name, FILE*file):
- file(file){
- set_file_name(file_name.c_str());
- init();
- }
-
- explicit LineReader(const char*file_name){
- set_file_name(file_name);
- open_file(file_name);
- init();
- }
-
- explicit LineReader(const std::string&file_name){
- set_file_name(file_name.c_str());
- open_file(file_name.c_str());
- init();
- }
-
- void set_file_name(const std::string&file_name){
- set_file_name(file_name.c_str());
- }
-
- void set_file_name(const char*file_name){
- strncpy(this->file_name, file_name, error::max_file_name_length);
- this->file_name[error::max_file_name_length] = '\0';
- }
-
- const char*get_truncated_file_name()const{
- return file_name;
- }
-
- void set_file_line(unsigned file_line){
- this->file_line = file_line;
- }
-
- unsigned get_file_line()const{
- return file_line;
- }
+ ////////////////////////////////////////////////////////////////////////////
+ // LineReader //
+ ////////////////////////////////////////////////////////////////////////////
+
+ namespace error{
+ struct base : std::exception{
+ virtual void format_error_message()const = 0;
+
+ const char*what()const throw(){
+ format_error_message();
+ return error_message_buffer;
+ }
+
+ mutable char error_message_buffer[256];
+ };
+
+ const int max_file_name_length = 255;
+
+ struct with_file_name{
+ with_file_name(){
+ std::memset(file_name, 0, max_file_name_length+1);
+ }
+
+ void set_file_name(const char*file_name){
+ std::strncpy(this->file_name, file_name, max_file_name_length);
+ this->file_name[max_file_name_length] = '\0';
+ }
+
+ char file_name[max_file_name_length+1];
+ };
+
+ struct with_file_line{
+ with_file_line(){
+ file_line = -1;
+ }
+
+ void set_file_line(int file_line){
+ this->file_line = file_line;
+ }
+
+ int file_line;
+ };
+
+ struct with_errno{
+ with_errno(){
+ errno_value = 0;
+ }
+
+ void set_errno(int errno_value){
+ this->errno_value = errno_value;
+ }
+
+ int errno_value;
+ };
+
+ struct can_not_open_file :
+ base,
+ with_file_name,
+ with_errno{
+ void format_error_message()const{
+ if(errno_value != 0)
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Can not open file \"%s\" because \"%s\"."
+ , file_name, std::strerror(errno_value));
+ else
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Can not open file \"%s\"."
+ , file_name);
+ }
+ };
+
+ struct line_length_limit_exceeded :
+ base,
+ with_file_name,
+ with_file_line{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
+ , file_line, file_name);
+ }
+ };
+ }
+
+ class ByteSourceBase{
+ public:
+ virtual int read(char*buffer, int size)=0;
+ virtual ~ByteSourceBase(){}
+ };
+
+ namespace detail{
+
+ class OwningStdIOByteSourceBase : public ByteSourceBase{
+ public:
+ explicit OwningStdIOByteSourceBase(FILE*file):file(file){
+ // Tell the std library that we want to do the buffering ourself.
+ std::setvbuf(file, 0, _IONBF, 0);
+ }
+
+ int read(char*buffer, int size){
+ return std::fread(buffer, 1, size, file);
+ }
+
+ ~OwningStdIOByteSourceBase(){
+ std::fclose(file);
+ }
+
+ private:
+ FILE*file;
+ };
+
+ class NonOwningIStreamByteSource : public ByteSourceBase{
+ public:
+ explicit NonOwningIStreamByteSource(std::istream&in):in(in){}
+
+ int read(char*buffer, int size){
+ in.read(buffer, size);
+ return in.gcount();
+ }
+
+ ~NonOwningIStreamByteSource(){}
+
+ private:
+ std::istream&in;
+ };
+
+ class NonOwningStringByteSource : public ByteSourceBase{
+ public:
+ NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}
+
+ int read(char*buffer, int desired_byte_count){
+ int to_copy_byte_count = desired_byte_count;
+ if(remaining_byte_count < to_copy_byte_count)
+ to_copy_byte_count = remaining_byte_count;
+ std::memcpy(buffer, str, to_copy_byte_count);
+ remaining_byte_count -= to_copy_byte_count;
+ str += to_copy_byte_count;
+ return to_copy_byte_count;
+ }
+
+ ~NonOwningStringByteSource(){}
+
+ private:
+ const char*str;
+ long long remaining_byte_count;
+ };
+
+ #ifndef CSV_IO_NO_THREAD
+ class AsynchronousReader{
+ public:
+ void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
+ std::unique_lock<std::mutex>guard(lock);
+ byte_source = std::move(arg_byte_source);
+ desired_byte_count = -1;
+ termination_requested = false;
+ worker = std::thread(
+ [&]{
+ std::unique_lock<std::mutex>guard(lock);
+ try{
+ for(;;){
+ read_requested_condition.wait(
+ guard,
+ [&]{
+ return desired_byte_count != -1 || termination_requested;
+ }
+ );
+ if(termination_requested)
+ return;
+
+ read_byte_count = byte_source->read(buffer, desired_byte_count);
+ desired_byte_count = -1;
+ if(read_byte_count == 0)
+ break;
+ read_finished_condition.notify_one();
+ }
+ }catch(...){
+ read_error = std::current_exception();
+ }
+ read_finished_condition.notify_one();
+ }
+ );
+ }
+
+ bool is_valid()const{
+ return byte_source != 0;
+ }
+
+ void start_read(char*arg_buffer, int arg_desired_byte_count){
+ std::unique_lock<std::mutex>guard(lock);
+ buffer = arg_buffer;
+ desired_byte_count = arg_desired_byte_count;
+ read_byte_count = -1;
+ read_requested_condition.notify_one();
+ }
+
+ int finish_read(){
+ std::unique_lock<std::mutex>guard(lock);
+ read_finished_condition.wait(
+ guard,
+ [&]{
+ return read_byte_count != -1 || read_error;
+ }
+ );
+ if(read_error)
+ std::rethrow_exception(read_error);
+ else
+ return read_byte_count;
+ }
+
+ ~AsynchronousReader(){
+ if(byte_source != 0){
+ {
+ std::unique_lock<std::mutex>guard(lock);
+ termination_requested = true;
+ }
+ read_requested_condition.notify_one();
+ worker.join();
+ }
+ }
+
+ private:
+ std::unique_ptr<ByteSourceBase>byte_source;
+
+ std::thread worker;
+
+ bool termination_requested;
+ std::exception_ptr read_error;
+ char*buffer;
+ int desired_byte_count;
+ int read_byte_count;
+
+ std::mutex lock;
+ std::condition_variable read_finished_condition;
+ std::condition_variable read_requested_condition;
+ };
+ #endif
+
+ class SynchronousReader{
+ public:
+ void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
+ byte_source = std::move(arg_byte_source);
+ }
+
+ bool is_valid()const{
+ return byte_source != 0;
+ }
+
+ void start_read(char*arg_buffer, int arg_desired_byte_count){
+ buffer = arg_buffer;
+ desired_byte_count = arg_desired_byte_count;
+ }
+
+ int finish_read(){
+ return byte_source->read(buffer, desired_byte_count);
+ }
+ private:
+ std::unique_ptr<ByteSourceBase>byte_source;
+ char*buffer;
+ int desired_byte_count;
+ };
+ }
+
+ class LineReader{
+ private:
+ static const int block_len = 1<<24;
+ #ifdef CSV_IO_NO_THREAD
+ detail::SynchronousReader reader;
+ #else
+ detail::AsynchronousReader reader;
+ #endif
+ char*buffer;
+ int data_begin;
+ int data_end;
+
+ char file_name[error::max_file_name_length+1];
+ unsigned file_line;
+
+ static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
+ // We open the file in binary mode as it makes no difference under *nix
+ // and under Windows we handle \r\n newlines ourself.
+ FILE*file = std::fopen(file_name, "rb");
+ if(file == 0){
+ int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
+ error::can_not_open_file err;
+ err.set_errno(x);
+ err.set_file_name(file_name);
+ throw err;
+ }
+ return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
+ }
+
+ void init(std::unique_ptr<ByteSourceBase>byte_source){
+ file_line = 0;
+
+ buffer = new char[3*block_len];
+ try{
+ data_begin = 0;
+ data_end = byte_source->read(buffer, 2*block_len);
+
+ // Ignore UTF-8 BOM
+ if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
+ data_begin = 3;
+
+ if(data_end == 2*block_len){
+ reader.init(std::move(byte_source));
+ reader.start_read(buffer + 2*block_len, block_len);
+ }
+ }catch(...){
+ delete[]buffer;
+ throw;
+ }
+ }
+
+ public:
+ LineReader() = delete;
+ LineReader(const LineReader&) = delete;
+ LineReader&operator=(const LineReader&) = delete;
+
+ explicit LineReader(const char*file_name){
+ set_file_name(file_name);
+ init(open_file(file_name));
+ }
+
+ explicit LineReader(const std::string&file_name){
+ set_file_name(file_name.c_str());
+ init(open_file(file_name.c_str()));
+ }
+
+ LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
+ set_file_name(file_name);
+ init(std::move(byte_source));
+ }
+
+ LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
+ set_file_name(file_name.c_str());
+ init(std::move(byte_source));
+ }
+
+ LineReader(const char*file_name, const char*data_begin, const char*data_end){
+ set_file_name(file_name);
+ init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
+ }
+
+ LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
+ set_file_name(file_name.c_str());
+ init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
+ }
+
+ LineReader(const char*file_name, FILE*file){
+ set_file_name(file_name);
+ init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
+ }
+
+ LineReader(const std::string&file_name, FILE*file){
+ set_file_name(file_name.c_str());
+ init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
+ }
+
+ LineReader(const char*file_name, std::istream&in){
+ set_file_name(file_name);
+ init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
+ }
+
+ LineReader(const std::string&file_name, std::istream&in){
+ set_file_name(file_name.c_str());
+ init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
+ }
+
+ void set_file_name(const std::string&file_name){
+ set_file_name(file_name.c_str());
+ }
+
+ void set_file_name(const char*file_name){
+ strncpy(this->file_name, file_name, error::max_file_name_length);
+ this->file_name[error::max_file_name_length] = '\0';
+ }
+
+ const char*get_truncated_file_name()const{
+ return file_name;
+ }
+
+ void set_file_line(unsigned file_line){
+ this->file_line = file_line;
+ }
+
+ unsigned get_file_line()const{
+ return file_line;
+ }
+
+ char*next_line(){
+ if(data_begin == data_end)
+ return 0;
+
+ ++file_line;
+
+ assert(data_begin < data_end);
+ assert(data_end <= block_len*2);
+
+ if(data_begin >= block_len){
+ std::memcpy(buffer, buffer+block_len, block_len);
+ data_begin -= block_len;
+ data_end -= block_len;
+ if(reader.is_valid())
+ {
+ data_end += reader.finish_read();
+ std::memcpy(buffer+block_len, buffer+2*block_len, block_len);
+ reader.start_read(buffer + 2*block_len, block_len);
+ }
+ }
+
+ int line_end = data_begin;
+ while(buffer[line_end] != '\n' && line_end != data_end){
+ ++line_end;
+ }
+
+ if(line_end - data_begin + 1 > block_len){
+ error::line_length_limit_exceeded err;
+ err.set_file_name(file_name);
+ err.set_file_line(file_line);
+ throw err;
+ }
+
+ if(buffer[line_end] == '\n'){
+ buffer[line_end] = '\0';
+ }else{
+ // some files are missing the newline at the end of the
+ // last line
+ ++data_end;
+ buffer[line_end] = '\0';
+ }
+
+ // handle windows \r\n-line breaks
+ if(line_end != data_begin && buffer[line_end-1] == '\r')
+ buffer[line_end-1] = '\0';
+
+ char*ret = buffer + data_begin;
+ data_begin = line_end+1;
+ return ret;
+ }
+
+ ~LineReader(){
+ delete[] buffer;
+ }
+ };
+
+
+ ////////////////////////////////////////////////////////////////////////////
+ // CSV //
+ ////////////////////////////////////////////////////////////////////////////
+
+ namespace error{
+ const int max_column_name_length = 63;
+ struct with_column_name{
+ with_column_name(){
+ std::memset(column_name, 0, max_column_name_length+1);
+ }
+
+ void set_column_name(const char*column_name){
+ std::strncpy(this->column_name, column_name, max_column_name_length);
+ this->column_name[max_column_name_length] = '\0';
+ }
+
+ char column_name[max_column_name_length+1];
+ };
+
+
+ const int max_column_content_length = 63;
+
+ struct with_column_content{
+ with_column_content(){
+ std::memset(column_content, 0, max_column_content_length+1);
+ }
+
+ void set_column_content(const char*column_content){
+ std::strncpy(this->column_content, column_content, max_column_content_length);
+ this->column_content[max_column_content_length] = '\0';
+ }
+
+ char column_content[max_column_content_length+1];
+ };
+
+
+ struct extra_column_in_header :
+ base,
+ with_file_name,
+ with_column_name{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Extra column \"%s\" in header of file \"%s\"."
+ , column_name, file_name);
+ }
+ };
+
+ struct missing_column_in_header :
+ base,
+ with_file_name,
+ with_column_name{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Missing column \"%s\" in header of file \"%s\"."
+ , column_name, file_name);
+ }
+ };
+
+ struct duplicated_column_in_header :
+ base,
+ with_file_name,
+ with_column_name{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Duplicated column \"%s\" in header of file \"%s\"."
+ , column_name, file_name);
+ }
+ };
+
+ struct header_missing :
+ base,
+ with_file_name{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Header missing in file \"%s\"."
+ , file_name);
+ }
+ };
+
+ struct too_few_columns :
+ base,
+ with_file_name,
+ with_file_line{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Too few columns in line %d in file \"%s\"."
+ , file_line, file_name);
+ }
+ };
+
+ struct too_many_columns :
+ base,
+ with_file_name,
+ with_file_line{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Too many columns in line %d in file \"%s\"."
+ , file_line, file_name);
+ }
+ };
+
+ struct escaped_string_not_closed :
+ base,
+ with_file_name,
+ with_file_line{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "Escaped string was not closed in line %d in file \"%s\"."
+ , file_line, file_name);
+ }
+ };
+
+ struct integer_must_be_positive :
+ base,
+ with_file_name,
+ with_file_line,
+ with_column_name,
+ with_column_content{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"."
+ , column_content, column_name, file_name, file_line);
+ }
+ };
+
+ struct no_digit :
+ base,
+ with_file_name,
+ with_file_line,
+ with_column_name,
+ with_column_content{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"."
+ , column_content, column_name, file_name, file_line);
+ }
+ };
+
+ struct integer_overflow :
+ base,
+ with_file_name,
+ with_file_line,
+ with_column_name,
+ with_column_content{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"."
+ , column_content, column_name, file_name, file_line);
+ }
+ };
+
+ struct integer_underflow :
+ base,
+ with_file_name,
+ with_file_line,
+ with_column_name,
+ with_column_content{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"."
+ , column_content, column_name, file_name, file_line);
+ }
+ };
+
+ struct invalid_single_character :
+ base,
+ with_file_name,
+ with_file_line,
+ with_column_name,
+ with_column_content{
+ void format_error_message()const{
+ std::snprintf(error_message_buffer, sizeof(error_message_buffer),
+ "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character."
+ , column_content, column_name, file_name, file_line);
+ }
+ };
+ }
+
+ typedef unsigned ignore_column;
+ static const ignore_column ignore_no_column = 0;
+ static const ignore_column ignore_extra_column = 1;
+ static const ignore_column ignore_missing_column = 2;
+
+ template<char ... trim_char_list>
+ struct trim_chars{
+ private:
+ constexpr static bool is_trim_char(char){
+ return false;
+ }
+
+ template<class ...OtherTrimChars>
+ constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
+ return c == trim_char || is_trim_char(c, other_trim_chars...);
+ }
+
+ public:
+ static void trim(char*&str_begin, char*&str_end){
+ while(is_trim_char(*str_begin, trim_char_list...) && str_begin != str_end)
+ ++str_begin;
+ while(is_trim_char(*(str_end-1), trim_char_list...) && str_begin != str_end)
+ --str_end;
+ *str_end = '\0';
+ }
+ };
+
+
+ struct no_comment{
+ static bool is_comment(const char*){
+ return false;
+ }
+ };
+
+ template<char ... comment_start_char_list>
+ struct single_line_comment{
+ private:
+ constexpr static bool is_comment_start_char(char){
+ return false;
+ }
+
+ template<class ...OtherCommentStartChars>
+ constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
+ return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
+ }
+
+ public:
+
+ static bool is_comment(const char*line){
+ return is_comment_start_char(*line, comment_start_char_list...);
+ }
+ };
+
+ struct empty_line_comment{
+ static bool is_comment(const char*line){
+ if(*line == '\0')
+ return true;
+ while(*line == ' ' || *line == '\t'){
+ ++line;
+ if(*line == 0)
+ return true;
+ }
+ return false;
+ }
+ };
+
+ template<char ... comment_start_char_list>
+ struct single_and_empty_line_comment{
+ static bool is_comment(const char*line){
+ return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
+ }
+ };
+
+ template<char sep>
+ struct no_quote_escape{
+ static const char*find_next_column_end(const char*col_begin){
+ while(*col_begin != sep && *col_begin != '\0')
+ ++col_begin;
+ return col_begin;
+ }
+
+ static void unescape(char*&, char*&){
+
+ }
+ };
+
+ template<char sep, char quote>
+ struct double_quote_escape{
+ static const char*find_next_column_end(const char*col_begin){
+ while(*col_begin != sep && *col_begin != '\0')
+ if(*col_begin != quote)
+ ++col_begin;
+ else{
+ do{
+ ++col_begin;
+ while(*col_begin != quote){
+ if(*col_begin == '\0')
+ throw error::escaped_string_not_closed();
+ ++col_begin;
+ }
+ ++col_begin;
+ }while(*col_begin == quote);
+ }
+ return col_begin;
+ }
+
+ static void unescape(char*&col_begin, char*&col_end){
+ if(col_end - col_begin >= 2){
+ if(*col_begin == quote && *(col_end-1) == quote){
+ ++col_begin;
+ --col_end;
+ char*out = col_begin;
+ for(char*in = col_begin; in!=col_end; ++in){
+ if(*in == quote && *(in+1) == quote){
+ ++in;
+ }
+ *out = *in;
+ ++out;
+ }
+ col_end = out;
+ *col_end = '\0';
+ }
+ }
+
+ }
+ };
+
+ struct throw_on_overflow{
+ template<class T>
+ static void on_overflow(T&){
+ throw error::integer_overflow();
+ }
+
+ template<class T>
+ static void on_underflow(T&){
+ throw error::integer_underflow();
+ }
+ };
+
+ struct ignore_overflow{
+ template<class T>
+ static void on_overflow(T&){}
+
+ template<class T>
+ static void on_underflow(T&){}
+ };
+
+ struct set_to_max_on_overflow{
+ template<class T>
+ static void on_overflow(T&x){
+ x = std::numeric_limits<T>::max();
+ }
+
+ template<class T>
+ static void on_underflow(T&x){
+ x = std::numeric_limits<T>::min();
+ }
+ };
+
+
+ namespace detail{
+ template<class quote_policy>
+ void chop_next_column(
+ char*&line, char*&col_begin, char*&col_end
+ ){
+ assert(line != nullptr);
+
+ col_begin = line;
+ // the col_begin + (... - col_begin) removes the constness
+ col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
+
+ if(*col_end == '\0'){
+ line = nullptr;
+ }else{
+ *col_end = '\0';
+ line = col_end + 1;
+ }
+ }
+
+ template<class trim_policy, class quote_policy>
+ void parse_line(
+ char*line,
+ char**sorted_col,
+ const std::vector<int>&col_order
+ ){
+ for(std::size_t i=0; i<col_order.size(); ++i){
+ if(line == nullptr)
+ throw ::io::error::too_few_columns();
+ char*col_begin, *col_end;
+ chop_next_column<quote_policy>(line, col_begin, col_end);
+
+ if(col_order[i] != -1){
+ trim_policy::trim(col_begin, col_end);
+ quote_policy::unescape(col_begin, col_end);
+
+ sorted_col[col_order[i]] = col_begin;
+ }
+ }
+ if(line != nullptr)
+ throw ::io::error::too_many_columns();
+ }
+
+ template<unsigned column_count, class trim_policy, class quote_policy>
+ void parse_header_line(
+ char*line,
+ std::vector<int>&col_order,
+ const std::string*col_name,
+ ignore_column ignore_policy
+ ){
+ col_order.clear();
+
+ bool found[column_count];
+ std::fill(found, found + column_count, false);
+ while(line){
+ char*col_begin,*col_end;
+ chop_next_column<quote_policy>(line, col_begin, col_end);
+
+ trim_policy::trim(col_begin, col_end);
+ quote_policy::unescape(col_begin, col_end);
+
+ for(unsigned i=0; i<column_count; ++i)
+ if(col_begin == col_name[i]){
+ if(found[i]){
+ error::duplicated_column_in_header err;
+ err.set_column_name(col_begin);
+ throw err;
+ }
+ found[i] = true;
+ col_order.push_back(i);
+ col_begin = 0;
+ break;
+ }
+ if(col_begin){
+ if(ignore_policy & ::io::ignore_extra_column)
+ col_order.push_back(-1);
+ else{
+ error::extra_column_in_header err;
+ err.set_column_name(col_begin);
+ throw err;
+ }
+ }
+ }
+ if(!(ignore_policy & ::io::ignore_missing_column)){
+ for(unsigned i=0; i<column_count; ++i){
+ if(!found[i]){
+ error::missing_column_in_header err;
+ err.set_column_name(col_name[i].c_str());
+ throw err;
+ }
+ }
+ }
+ }
+
+ template<class overflow_policy>
+ void parse(char*col, char &x){
+ if(!*col)
+ throw error::invalid_single_character();
+ x = *col;
+ ++col;
+ if(*col)
+ throw error::invalid_single_character();
+ }
+
+ template<class overflow_policy>
+ void parse(char*col, std::string&x){
+ x = col;
+ }
+
+ template<class overflow_policy>
+ void parse(char*col, const char*&x){
+ x = col;
+ }
+
+ template<class overflow_policy>
+ void parse(char*col, char*&x){
+ x = col;
+ }
+
+ template<class overflow_policy, class T>
+ void parse_unsigned_integer(const char*col, T&x){
+ x = 0;
+ while(*col != '\0'){
+ if('0' <= *col && *col <= '9'){
+ T y = *col - '0';
+ if(x > (std::numeric_limits<T>::max()-y)/10){
+ overflow_policy::on_overflow(x);
+ return;
+ }
+ x = 10*x+y;
+ }else
+ throw error::no_digit();
+ ++col;
+ }
+ }
+
+ template<class overflow_policy>void parse(char*col, unsigned char &x)
+ {parse_unsigned_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, unsigned short &x)
+ {parse_unsigned_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, unsigned int &x)
+ {parse_unsigned_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, unsigned long &x)
+ {parse_unsigned_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, unsigned long long &x)
+ {parse_unsigned_integer<overflow_policy>(col, x);}
+
+ template<class overflow_policy, class T>
+ void parse_signed_integer(const char*col, T&x){
+ if(*col == '-'){
+ ++col;
+
+ x = 0;
+ while(*col != '\0'){
+ if('0' <= *col && *col <= '9'){
+ T y = *col - '0';
+ if(x < (std::numeric_limits<T>::min()+y)/10){
+ overflow_policy::on_underflow(x);
+ return;
+ }
+ x = 10*x-y;
+ }else
+ throw error::no_digit();
+ ++col;
+ }
+ return;
+ }else if(*col == '+')
+ ++col;
+ parse_unsigned_integer<overflow_policy>(col, x);
+ }
+
+ template<class overflow_policy>void parse(char*col, signed char &x)
+ {parse_signed_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, signed short &x)
+ {parse_signed_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, signed int &x)
+ {parse_signed_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, signed long &x)
+ {parse_signed_integer<overflow_policy>(col, x);}
+ template<class overflow_policy>void parse(char*col, signed long long &x)
+ {parse_signed_integer<overflow_policy>(col, x);}
+
+ template<class T>
+ void parse_float(const char*col, T&x){
+ bool is_neg = false;
+ if(*col == '-'){
+ is_neg = true;
+ ++col;
+ }else if(*col == '+')
+ ++col;
+
+ x = 0;
+ while('0' <= *col && *col <= '9'){
+ int y = *col - '0';
+ x *= 10;
+ x += y;
+ ++col;
+ }
+
+ if(*col == '.'|| *col == ','){
+ ++col;
+ T pos = 1;
+ while('0' <= *col && *col <= '9'){
+ pos /= 10;
+ int y = *col - '0';
+ ++col;
+ x += y*pos;
+ }
+ }
+
+ if(*col == 'e' || *col == 'E'){
+ ++col;
+ int e;
+
+ parse_signed_integer<set_to_max_on_overflow>(col, e);
+
+ if(e != 0){
+ T base;
+ if(e < 0){
+ base = 0.1;
+ e = -e;
+ }else{
+ base = 10;
+ }
+
+ while(e != 1){
+ if((e & 1) == 0){
+ base = base*base;
+ e >>= 1;
+ }else{
+ x *= base;
+ --e;
+ }
+ }
+ x *= base;
+ }
+ }else{
+ if(*col != '\0')
+ throw error::no_digit();
+ }
+
+ if(is_neg)
+ x = -x;
+ }
+
+ template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
+ template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
+ template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
+
+ template<class overflow_policy, class T>
+ void parse(char*col, T&x){
+ // GCC evalutes "false" when reading the template and
+ // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
+ // this strange construct is used.
+ static_assert(sizeof(T)!=sizeof(T),
+ "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
+ }
+
+ }
+
+ template<unsigned column_count,
+ class trim_policy = trim_chars<' ', '\t'>,
+ class quote_policy = no_quote_escape<','>,
+ class overflow_policy = throw_on_overflow,
+ class comment_policy = no_comment
+ >
+ class CSVReader{
+ private:
+ LineReader in;
+
+ char*(row[column_count]);
+ std::string column_names[column_count];
+
+ std::vector<int>col_order;
+
+ template<class ...ColNames>
+ void set_column_names(std::string s, ColNames...cols){
+ column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
+ set_column_names(std::forward<ColNames>(cols)...);
+ }
+
+ void set_column_names(){}
+
+
+ public:
+ CSVReader() = delete;
+ CSVReader(const CSVReader&) = delete;
+ CSVReader&operator=(const CSVReader&);
+
+ template<class ...Args>
+ explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
+ std::fill(row, row+column_count, nullptr);
+ col_order.resize(column_count);
+ for(unsigned i=0; i<column_count; ++i)
+ col_order[i] = i;
+ for(unsigned i=1; i<=column_count; ++i)
+ column_names[i-1] = "col"+std::to_string(i);
+ }
char*next_line(){
- if(data_begin == data_end)
- return 0;
-
- ++file_line;
-
- assert(data_begin < data_end);
- assert(data_end <= block_len*2);
-
- if(data_begin >= block_len){
- std::memcpy(buffer, buffer+block_len, block_len);
- data_begin -= block_len;
- data_end -= block_len;
- #ifndef CSV_IO_NO_THREAD
- if(bytes_read.valid())
- #endif
- {
- #ifndef CSV_IO_NO_THREAD
- data_end += bytes_read.get();
- #else
- data_end += std::fread(buffer + 2*block_len, 1, block_len, file);
- #endif
- std::memcpy(buffer+block_len, buffer+2*block_len, block_len);
-
- #ifndef CSV_IO_NO_THREAD
- bytes_read = std::async(std::launch::async, [=]()->int{
- return std::fread(buffer + 2*block_len, 1, block_len, file);
- });
- #endif
- }
- }
-
- int line_end = data_begin;
- while(buffer[line_end] != '\n' && line_end != data_end){
- ++line_end;
- }
-
- if(line_end - data_begin + 1 > block_len){
- error::line_length_limit_exceeded err;
- err.set_file_name(file_name);
- err.set_file_line(file_line);
- throw err;
- }
-
- if(buffer[line_end] == '\n'){
- buffer[line_end] = '\0';
- }else{
- // some files are missing the newline at the end of the
- // last line
- ++data_end;
- buffer[line_end] = '\0';
- }
-
- // handle windows \r\n-line breaks
- if(line_end != data_begin && buffer[line_end-1] == '\r')
- buffer[line_end-1] = '\0';
-
- char*ret = buffer + data_begin;
- data_begin = line_end+1;
- return ret;
- }
-
- ~LineReader(){
- #ifndef CSV_IO_NO_THREAD
- // GCC needs this or it will crash.
- if(bytes_read.valid())
- bytes_read.get();
- #endif
-
- delete[] buffer;
- std::fclose(file);
+ return in.next_line();
}
- };
-
- ////////////////////////////////////////////////////////////////////////////
- // CSV //
- ////////////////////////////////////////////////////////////////////////////
-
- namespace error{
- const int max_column_name_length = 63;
- struct with_column_name{
- with_column_name(){
- std::memset(column_name, 0, max_column_name_length+1);
- }
-
- void set_column_name(const char*column_name){
- std::strncpy(this->column_name, column_name, max_column_name_length);
- this->column_name[max_column_name_length] = '\0';
- }
-
- char column_name[max_column_name_length+1];
- };
-
-
- const int max_column_content_length = 63;
-
- struct with_column_content{
- with_column_content(){
- std::memset(column_content, 0, max_column_content_length+1);
- }
-
- void set_column_content(const char*column_content){
- std::strncpy(this->column_content, column_content, max_column_content_length);
- this->column_content[max_column_content_length] = '\0';
- }
-
- char column_content[max_column_content_length+1];
- };
-
-
- struct extra_column_in_header :
- base,
- with_file_name,
- with_column_name{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Extra column \"%s\" in header of file \"%s\"."
- , column_name, file_name);
- }
- };
-
- struct missing_column_in_header :
- base,
- with_file_name,
- with_column_name{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Missing column \"%s\" in header of file \"%s\"."
- , column_name, file_name);
- }
- };
-
- struct duplicated_column_in_header :
- base,
- with_file_name,
- with_column_name{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Duplicated column \"%s\" in header of file \"%s\"."
- , column_name, file_name);
- }
- };
-
- struct header_missing :
- base,
- with_file_name{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Header missing in file \"%s\"."
- , file_name);
- }
- };
-
- struct too_few_columns :
- base,
- with_file_name,
- with_file_line{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Too few columns in line %d in file \"%s\"."
- , file_line, file_name);
- }
- };
-
- struct too_many_columns :
- base,
- with_file_name,
- with_file_line{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Too many columns in line %d in file \"%s\"."
- , file_line, file_name);
- }
- };
-
- struct escaped_string_not_closed :
- base,
- with_file_name,
- with_file_line{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "Escaped string was not closed in line %d in file \"%s\"."
- , file_line, file_name);
- }
- };
-
- struct integer_must_be_positive :
- base,
- with_file_name,
- with_file_line,
- with_column_name,
- with_column_content{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "The integer \"%s\" must be positive or 0 in column \"%s\" in file \"%s\" in line \"%d\"."
- , column_content, column_name, file_name, file_line);
- }
- };
-
- struct no_digit :
- base,
- with_file_name,
- with_file_line,
- with_column_name,
- with_column_content{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "The integer \"%s\" contains an invalid digit in column \"%s\" in file \"%s\" in line \"%d\"."
- , column_content, column_name, file_name, file_line);
- }
- };
-
- struct integer_overflow :
- base,
- with_file_name,
- with_file_line,
- with_column_name,
- with_column_content{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "The integer \"%s\" overflows in column \"%s\" in file \"%s\" in line \"%d\"."
- , column_content, column_name, file_name, file_line);
- }
- };
-
- struct integer_underflow :
- base,
- with_file_name,
- with_file_line,
- with_column_name,
- with_column_content{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "The integer \"%s\" underflows in column \"%s\" in file \"%s\" in line \"%d\"."
- , column_content, column_name, file_name, file_line);
- }
- };
-
- struct invalid_single_character :
- base,
- with_file_name,
- with_file_line,
- with_column_name,
- with_column_content{
- void format_error_message()const{
- std::snprintf(error_message_buffer, sizeof(error_message_buffer),
- "The content \"%s\" of column \"%s\" in file \"%s\" in line \"%d\" is not a single character."
- , column_content, column_name, file_name, file_line);
- }
- };
- }
-
- typedef unsigned ignore_column;
- static const ignore_column ignore_no_column = 0;
- static const ignore_column ignore_extra_column = 1;
- static const ignore_column ignore_missing_column = 2;
-
- template<char ... trim_char_list>
- struct trim_chars{
- private:
- constexpr static bool is_trim_char(char c){
- return false;
- }
-
- template<class ...OtherTrimChars>
- constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
- return c == trim_char || is_trim_char(c, other_trim_chars...);
- }
-
- public:
- static void trim(char*&str_begin, char*&str_end){
- while(is_trim_char(*str_begin, trim_char_list...) && str_begin != str_end)
- ++str_begin;
- while(is_trim_char(*(str_end-1), trim_char_list...) && str_begin != str_end)
- --str_end;
- *str_end = '\0';
- }
- };
-
-
- struct no_comment{
- static bool is_comment(const char*line){
- return false;
- }
- };
- template<char ... comment_start_char_list>
- struct single_line_comment{
- private:
- constexpr static bool is_comment_start_char(char c){
- return false;
- }
-
- template<class ...OtherCommentStartChars>
- constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
- return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
- }
-
- public:
-
- static bool is_comment(const char*line){
- return is_comment_start_char(*line, comment_start_char_list...);
- }
- };
-
- struct empty_line_comment{
- static bool is_comment(const char*line){
- if(*line == '\0')
- return true;
- while(*line == ' ' || *line == '\t'){
- ++line;
- if(*line == 0)
- return true;
- }
- return false;
- }
- };
-
- template<char ... comment_start_char_list>
- struct single_and_empty_line_comment{
- static bool is_comment(const char*line){
- return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
- }
- };
-
- template<char sep>
- struct no_quote_escape{
- static const char*find_next_column_end(const char*col_begin){
- while(*col_begin != sep && *col_begin != '\0')
- ++col_begin;
- return col_begin;
- }
-
- static void unescape(char*&col_begin, char*&col_end){
-
- }
- };
-
- template<char sep, char quote>
- struct double_quote_escape{
- static const char*find_next_column_end(const char*col_begin){
- while(*col_begin != sep && *col_begin != '\0')
- if(*col_begin != quote)
- ++col_begin;
- else{
- do{
- ++col_begin;
- while(*col_begin != quote){
- if(*col_begin == '\0')
- throw error::escaped_string_not_closed();
- ++col_begin;
- }
- ++col_begin;
- }while(*col_begin == quote);
- }
- return col_begin;
- }
-
- static void unescape(char*&col_begin, char*&col_end){
- if(col_end - col_begin >= 2){
- if(*col_begin == quote && *(col_end-1) == quote){
- ++col_begin;
- --col_end;
- char*out = col_begin;
- for(char*in = col_begin; in!=col_end; ++in){
- if(*in == quote && *(in+1) == quote){
- continue;
- }
- *out = *in;
- ++out;
- }
- col_end = out;
- *col_end = '\0';
- }
- }
-
- }
- };
-
- struct throw_on_overflow{
- template<class T>
- static void on_overflow(T&){
- throw error::integer_overflow();
- }
-
- template<class T>
- static void on_underflow(T&){
- throw error::integer_underflow();
- }
- };
-
- struct ignore_overflow{
- template<class T>
- static void on_overflow(T&){}
-
- template<class T>
- static void on_underflow(T&){}
- };
-
- struct set_to_max_on_overflow{
- template<class T>
- static void on_overflow(T&x){
- x = std::numeric_limits<T>::max();
- }
-
- template<class T>
- static void on_underflow(T&x){
- x = std::numeric_limits<T>::min();
- }
- };
-
-
- namespace detail{
- template<class quote_policy>
- void chop_next_column(
- char*&line, char*&col_begin, char*&col_end
- ){
- assert(line != nullptr);
-
- col_begin = line;
- // the col_begin + (... - col_begin) removes the constness
- col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
-
- if(*col_end == '\0'){
- line = nullptr;
- }else{
- *col_end = '\0';
- line = col_end + 1;
- }
- }
-
- template<class trim_policy, class quote_policy>
- void parse_line(
- char*line,
- char**sorted_col,
- const std::vector<int>&col_order
- ){
- for(std::size_t i=0; i<col_order.size(); ++i){
- if(line == nullptr)
- throw io::error::too_few_columns();
- char*col_begin, *col_end;
- chop_next_column<quote_policy>(line, col_begin, col_end);
-
- if(col_order[i] != -1){
- trim_policy::trim(col_begin, col_end);
- quote_policy::unescape(col_begin, col_end);
-
- sorted_col[col_order[i]] = col_begin;
- }
- }
- if(line != nullptr)
- throw io::error::too_many_columns();
- }
-
- template<unsigned column_count, class trim_policy, class quote_policy>
- void parse_header_line(
- char*line,
- std::vector<int>&col_order,
- const std::string*col_name,
- ignore_column ignore_policy
- ){
- col_order.clear();
-
- bool found[column_count];
- std::fill(found, found + column_count, false);
- while(line){
- char*col_begin,*col_end;
- chop_next_column<quote_policy>(line, col_begin, col_end);
-
- trim_policy::trim(col_begin, col_end);
- quote_policy::unescape(col_begin, col_end);
-
- for(unsigned i=0; i<column_count; ++i)
- if(col_begin == col_name[i]){
- if(found[i]){
- error::duplicated_column_in_header err;
- err.set_column_name(col_begin);
- throw err;
- }
- found[i] = true;
- col_order.push_back(i);
- col_begin = 0;
- break;
- }
- if(col_begin){
- if(ignore_policy & io::ignore_extra_column)
- col_order.push_back(-1);
- else{
- error::extra_column_in_header err;
- err.set_column_name(col_begin);
- throw err;
- }
- }
- }
- if(!(ignore_policy & io::ignore_missing_column)){
- for(unsigned i=0; i<column_count; ++i){
- if(!found[i]){
- error::missing_column_in_header err;
- err.set_column_name(col_name[i].c_str());
- throw err;
- }
- }
- }
- }
-
- template<class overflow_policy>
- void parse(char*col, char &x){
- if(!*col)
- throw error::invalid_single_character();
- x = *col;
- ++col;
- if(*col)
- throw error::invalid_single_character();
- }
-
- template<class overflow_policy>
- void parse(char*col, std::string&x){
- x = col;
- }
-
- template<class overflow_policy>
- void parse(char*col, const char*&x){
- x = col;
- }
-
- template<class overflow_policy>
- void parse(char*col, char*&x){
- x = col;
- }
-
- template<class overflow_policy, class T>
- void parse_unsigned_integer(const char*col, T&x){
- x = 0;
- while(*col != '\0'){
- if('0' <= *col && *col <= '9'){
- T y = *col - '0';
- if(x > (std::numeric_limits<T>::max()-y)/10){
- overflow_policy::on_overflow(x);
- return;
- }
- x = 10*x+y;
- }else
- throw error::no_digit();
- ++col;
- }
- }
-
- template<class overflow_policy>void parse(char*col, unsigned char &x)
- {parse_unsigned_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, unsigned short &x)
- {parse_unsigned_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, unsigned int &x)
- {parse_unsigned_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, unsigned long &x)
- {parse_unsigned_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, unsigned long long &x)
- {parse_unsigned_integer<overflow_policy>(col, x);}
-
- template<class overflow_policy, class T>
- void parse_signed_integer(const char*col, T&x){
- if(*col == '-'){
- ++col;
-
- x = 0;
- while(*col != '\0'){
- if('0' <= *col && *col <= '9'){
- T y = *col - '0';
- if(x < (std::numeric_limits<T>::min()+y)/10){
- overflow_policy::on_underflow(x);
- return;
- }
- x = 10*x-y;
- }else
- throw error::no_digit();
- ++col;
- }
- return;
- }else if(*col == '+')
- ++col;
- parse_unsigned_integer<overflow_policy>(col, x);
- }
-
- template<class overflow_policy>void parse(char*col, signed char &x)
- {parse_signed_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, signed short &x)
- {parse_signed_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, signed int &x)
- {parse_signed_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, signed long &x)
- {parse_signed_integer<overflow_policy>(col, x);}
- template<class overflow_policy>void parse(char*col, signed long long &x)
- {parse_signed_integer<overflow_policy>(col, x);}
-
- template<class T>
- void parse_float(const char*col, T&x){
- bool is_neg = false;
- if(*col == '-'){
- is_neg = true;
- ++col;
- }else if(*col == '+')
- ++col;
-
- x = 0;
- while('0' <= *col && *col <= '9'){
- int y = *col - '0';
- x *= 10;
- x += y;
- ++col;
- }
-
- if(*col == '.'|| *col == ','){
- ++col;
- T pos = 1;
- while('0' <= *col && *col <= '9'){
- pos /= 10;
- int y = *col - '0';
- ++col;
- x += y*pos;
- }
- }
-
- if(*col == 'e' || *col == 'E'){
- ++col;
- int e;
-
- parse_signed_integer<set_to_max_on_overflow>(col, e);
-
- if(e != 0){
- T base;
- if(e < 0){
- base = 0.1;
- e = -e;
- }else{
- base = 10;
- }
-
- while(e != 1){
- if((e & 1) == 0){
- base = base*base;
- e >>= 1;
- }else{
- x *= base;
- --e;
- }
- }
- x *= base;
- }
- }else{
- if(*col != '\0')
- throw error::no_digit();
- }
-
- if(is_neg)
- x = -x;
- }
-
- template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
- template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
- template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
-
- template<class overflow_policy, class T>
- void parse(char*col, T&x){
- // GCC evalutes "false" when reading the template and
- // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
- // this strange construct is used.
- static_assert(sizeof(T)!=sizeof(T),
- "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
- }
-
- }
-
- template<unsigned column_count,
- class trim_policy = trim_chars<' ', '\t'>,
- class quote_policy = no_quote_escape<','>,
- class overflow_policy = throw_on_overflow,
- class comment_policy = no_comment
- >
- class CSVReader{
- private:
- LineReader in;
-
- char*(row[column_count]);
- std::string column_names[column_count];
-
- std::vector<int>col_order;
-
- template<class ...ColNames>
- void set_column_names(std::string s, ColNames...cols){
- column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
- set_column_names(std::forward<ColNames>(cols)...);
- }
-
- void set_column_names(){}
-
-
- public:
- CSVReader() = delete;
- CSVReader(const CSVReader&) = delete;
- CSVReader&operator=(const CSVReader&);
-
- template<class ...Args>
- explicit CSVReader(Args...args):in(std::forward<Args>(args)...){
- std::fill(row, row+column_count, nullptr);
- col_order.resize(column_count);
- for(unsigned i=0; i<column_count; ++i)
- col_order[i] = i;
- for(unsigned i=1; i<=column_count; ++i)
- column_names[i-1] = "col"+std::to_string(i);
- }
-
- template<class ...ColNames>
- void read_header(ignore_column ignore_policy, ColNames...cols){
- static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
- static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
- try{
- set_column_names(std::forward<ColNames>(cols)...);
-
- char*line;
- do{
- line = in.next_line();
- if(!line)
- throw error::header_missing();
- }while(comment_policy::is_comment(line));
-
- detail::parse_header_line
- <column_count, trim_policy, quote_policy>
- (line, col_order, column_names, ignore_policy);
- }catch(error::with_file_name&err){
- err.set_file_name(in.get_truncated_file_name());
- throw;
- }
- }
-
- template<class ...ColNames>
- void set_header(ColNames...cols){
- static_assert(sizeof...(ColNames)>=column_count,
- "not enough column names specified");
- static_assert(sizeof...(ColNames)<=column_count,
- "too many column names specified");
- set_column_names(std::forward<ColNames>(cols)...);
- std::fill(row, row+column_count, nullptr);
- col_order.resize(column_count);
- for(unsigned i=0; i<column_count; ++i)
- col_order[i] = i;
- }
-
- bool has_column(const std::string&name) const {
- return col_order.end() != std::find(
- col_order.begin(), col_order.end(),
- std::find(std::begin(column_names), std::end(column_names), name)
- - std::begin(column_names));
- }
-
- void set_file_name(const std::string&file_name){
- in.set_file_name(file_name);
- }
-
- void set_file_name(const char*file_name){
- in.set_file_name(file_name);
- }
-
- const char*get_truncated_file_name()const{
- return in.get_truncated_file_name();
- }
-
- void set_file_line(unsigned file_line){
- in.set_file_line(file_line);
- }
-
- unsigned get_file_line()const{
- return in.get_file_line();
- }
-
- private:
- void parse_helper(std::size_t r){}
-
- template<class T, class ...ColType>
- void parse_helper(std::size_t r, T&t, ColType&...cols){
- if(row[r]){
- try{
- try{
- row[r] = row[r];
- io::detail::parse<overflow_policy>(row[r], t);
- }catch(error::with_column_content&err){
- err.set_column_content(row[r]);
- throw;
- }
- }catch(error::with_column_name&err){
- err.set_column_name(column_names[r].c_str());
- throw;
- }
- }
- parse_helper(r+1, cols...);
- }
-
-
- public:
- template<class ...ColType>
- bool read_row(ColType& ...cols){
- static_assert(sizeof...(ColType)>=column_count,
- "not enough columns specified");
- static_assert(sizeof...(ColType)<=column_count,
- "too many columns specified");
- try{
- try{
-
- char*line;
- do{
- line = in.next_line();
- if(!line)
- return false;
- }while(comment_policy::is_comment(line));
-
- detail::parse_line<trim_policy, quote_policy>
- (line, row, col_order);
-
- parse_helper(0, cols...);
- }catch(error::with_file_name&err){
- err.set_file_name(in.get_truncated_file_name());
- throw;
- }
- }catch(error::with_file_line&err){
- err.set_file_line(in.get_file_line());
- throw;
- }
-
- return true;
- }
- };
+ template<class ...ColNames>
+ void read_header(ignore_column ignore_policy, ColNames...cols){
+ static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
+ static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
+ try{
+ set_column_names(std::forward<ColNames>(cols)...);
+
+ char*line;
+ do{
+ line = in.next_line();
+ if(!line)
+ throw error::header_missing();
+ }while(comment_policy::is_comment(line));
+
+ detail::parse_header_line
+ <column_count, trim_policy, quote_policy>
+ (line, col_order, column_names, ignore_policy);
+ }catch(error::with_file_name&err){
+ err.set_file_name(in.get_truncated_file_name());
+ throw;
+ }
+ }
+
+ template<class ...ColNames>
+ void set_header(ColNames...cols){
+ static_assert(sizeof...(ColNames)>=column_count,
+ "not enough column names specified");
+ static_assert(sizeof...(ColNames)<=column_count,
+ "too many column names specified");
+ set_column_names(std::forward<ColNames>(cols)...);
+ std::fill(row, row+column_count, nullptr);
+ col_order.resize(column_count);
+ for(unsigned i=0; i<column_count; ++i)
+ col_order[i] = i;
+ }
+
+ bool has_column(const std::string&name) const {
+ return col_order.end() != std::find(
+ col_order.begin(), col_order.end(),
+ std::find(std::begin(column_names), std::end(column_names), name)
+ - std::begin(column_names));
+ }
+
+ void set_file_name(const std::string&file_name){
+ in.set_file_name(file_name);
+ }
+
+ void set_file_name(const char*file_name){
+ in.set_file_name(file_name);
+ }
+
+ const char*get_truncated_file_name()const{
+ return in.get_truncated_file_name();
+ }
+
+ void set_file_line(unsigned file_line){
+ in.set_file_line(file_line);
+ }
+
+ unsigned get_file_line()const{
+ return in.get_file_line();
+ }
+
+ private:
+ void parse_helper(std::size_t){}
+
+ template<class T, class ...ColType>
+ void parse_helper(std::size_t r, T&t, ColType&...cols){
+ if(row[r]){
+ try{
+ try{
+ ::io::detail::parse<overflow_policy>(row[r], t);
+ }catch(error::with_column_content&err){
+ err.set_column_content(row[r]);
+ throw;
+ }
+ }catch(error::with_column_name&err){
+ err.set_column_name(column_names[r].c_str());
+ throw;
+ }
+ }
+ parse_helper(r+1, cols...);
+ }
+
+
+ public:
+ template<class ...ColType>
+ bool read_row(ColType& ...cols){
+ static_assert(sizeof...(ColType)>=column_count,
+ "not enough columns specified");
+ static_assert(sizeof...(ColType)<=column_count,
+ "too many columns specified");
+ try{
+ try{
+
+ char*line;
+ do{
+ line = in.next_line();
+ if(!line)
+ return false;
+ }while(comment_policy::is_comment(line));
+
+ detail::parse_line<trim_policy, quote_policy>
+ (line, row, col_order);
+
+ parse_helper(0, cols...);
+ }catch(error::with_file_name&err){
+ err.set_file_name(in.get_truncated_file_name());
+ throw;
+ }
+ }catch(error::with_file_line&err){
+ err.set_file_line(in.get_file_line());
+ throw;
+ }
+
+ return true;
+ }
+ };
}
#endif
diff --git a/debian/README.source b/debian/README.source
deleted file mode 100644
index efd5624..0000000
--- a/debian/README.source
+++ /dev/null
@@ -1,12 +0,0 @@
-fccp for Debian
----------------
-
-This header-only library has no upstream tarball.
-It was downloaded and packed via debian/rule get-orig-sources.
-
-The documentation is a modified site from the wiki, installed in debian/doc.
-The pdf file was generated with "wkhtmltopdf Documentation.html Documentation.pdf".
-The batch is also in debian/doc.
-
- -- Jörg Frings-Fürst <debian@jff-webhosting.net> Fri, 08 Aug 2014 13:22:16 +0200
-
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 6f195be..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,20 +0,0 @@
-fast-cpp-csv-parser (0.0+git20140429~0a2590-2) UNRELEASED; urgency=medium
-
- * debian/control:
- - Move homepage to github.
- - Bump Standards-Version to 3.9.8 (no changes required).
- - Change VCS-Browser to secure uri.
- * debian/copyright:
- - Move homepage to github.
- * Remove debian/source/options because compression xz is standard now.
- * Bump compat level to 10 (no changes required).
- * debian/rules:
- - Change get-orig-source link to github.
-
- -- Jörg Frings-Fürst <debian@jff-webhosting.net> Sun, 11 Sep 2016 14:44:39 +0200
-
-fast-cpp-csv-parser (0.0+git20140429~0a2590-1) unstable; urgency=low
-
- * Initial release (Closes: #745898)
-
- -- Jörg Frings-Fürst <debian@jff-webhosting.net> Fri, 15 Aug 2014 16:06:33 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index badcef1..0000000
--- a/debian/control
+++ /dev/null
@@ -1,31 +0,0 @@
-Source: fast-cpp-csv-parser
-Section: libdevel
-Priority: optional
-Maintainer: Jörg Frings-Fürst <debian@jff-webhosting.net>
-Build-Depends: debhelper (>= 9)
-Standards-Version: 3.9.8
-Homepage: https://github.com/ben-strasser/fast-cpp-csv-parser
-Vcs-Git: git://anonscm.debian.org/collab-maint/fast-cpp-csv-parser.git
-Vcs-Browser: https://anonscm.debian.org/cgit/collab-maint/fast-cpp-csv-parser.git
-
-Package: libfccp-dev
-Architecture: all
-Depends: ${misc:Depends}
-Description: Fast C++ CSV Parser
- fast-cpp-cvs-praser is a small, easy-to-use and fast header-only
- library for reading comma separated value (CSV) files. The library
- is completely contained inside a single header file. The library
- can used with a standard conformant C++11 compiler.
- .
- Feature list:
- * Automatically rearranges columns by parsing the header line.
- * Disk I/O and CSV-parsing are overlapped using threads for efficiency.
- * Parsing features such as escaped strings can be enabled and disabled
- at compile time using templates.
- * Can read multiple GB files in reasonable time.
- * Support for custom columns separators (i.e. Tab separated value files
- are supported), quote escaped strings, automatic space trimming.
- * Works with *nix and Windows newlines and automatically ignores UTF-8 BOMs.
- * Exception classes with enough context to format useful error messages.
- * what() returns error messages ready to be shown to a user.
-
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 5a0a5d3..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,58 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: fast-cpp-csv-parser
-Source: https://github.com/ben-strasser/fast-cpp-csv-parser
-
-Files: *
-Copyright: 2012-2014 Ben Strasser <code@ben-strasser.net>
-License: BSD-3
-
-Files: debian/*
-Copyright: 2014 Jörg Frings-Fürst <debian@jff-webhosting.net>
-License: GPL-3+
-
-License: BSD-3
- All rights reserved.
- .
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- .
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- .
- 2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- .
- 3. Neither the name of the copyright holder nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
- .
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
-
-License: GPL-3+
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- .
- This package is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- .
- On Debian systems, the complete text of the GNU General
- Public License version 3 can be found in "/usr/share/common-licenses/GPL-3".
-
diff --git a/debian/doc-base b/debian/doc-base
deleted file mode 100644
index 1128371..0000000
--- a/debian/doc-base
+++ /dev/null
@@ -1,13 +0,0 @@
-Document: fast-cpp-csv-parser
-Title: Debian fccp Manual
-Author: Ben Strasser
-Abstract: This manual describes what fast-cpp-csv-parser is
- and how it can be used.
-Section: Programming/C++
-
-Format: PDF
-Files: /usr/share/doc/libfccp-dev/Documentation.pdf.gz
-
-Format: html
-Index: /usr/share/doc/libfccp-dev/Documentation.html
-Files: /usr/share/doc/libfccp-dev/*.html
diff --git a/debian/doc/Documentation.html b/debian/doc/Documentation.html
deleted file mode 100644
index 9c84a51..0000000
--- a/debian/doc/Documentation.html
+++ /dev/null
@@ -1,89 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >
- <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" >
-
-<title>Documentation - fast-cpp-csv-parser - Fast C++ CSV Parser
-</title>
-
-</head>
-
- <div id="wikicontent">
- <div class="vt" id="wikimaincol">
- <h1><b>Documentation - fast-cpp-csv-parser</b></h1>
- <p>The libary provides two classes: </p><ul><li><tt>LineReader</tt>: A class to efficiently read large files line by line. </li><li><tt>CSVReader</tt>: A class that efficiently reads large CSV files. </li></ul><p>Note that everything is contained in the <tt>io</tt> namespace. </p><h1><a name="LineReader"></a><tt>LineReader</tt><a href="#LineReader" class="section_anchor"></a></h1><pre class="prettyprint">class LineReader{
-public:
- // Constructors
- LineReader(some_string_type file_name);
- LineReader(some_string_type file_name, std::FILE*file);
-
- // Reading
- char*next_line();
-
- // File Location
- void set_file_line(unsigned);
- unsigned get_file_line(unsigned)const;
- void set_file_name(some_string_type file_name);
- const char*get_truncated_file_name()const;
-};</pre><p>The constructor takes a file name and optionally a <tt>stdio.h</tt> file handle. If no file handle is provided the class tries to open the file and throws an <tt>error::can_not_open_file exception</tt> on failure. If a file handle is provided then the file name is only used to format error messages. The library will call <tt>std::fclose</tt> on the file handle. <tt>some_string_type</tt> can be a <tt>std::string</tt> or a <tt>char*</tt>. </p><p>Lines are read by calling the <tt>next_line</tt> function. It returns a pointer to a null terminated C-string that contains the line. If the end of file is reached a null pointer is returned. The newline character is not included in the string. You may modify the string as long as you do not write past the null terminator. The string stays valid until the destructor is called or until next_line is called again. Windows and <tt>*</tt>nix newlines are handled transparently. UTF-8 BOMs are automatically ignored and missing newlines at the end of the file are no problem. </p><p><strong>Important:</strong> There is a limit of 2^24-1 characters per line. If this limit is exceeded a <tt>error::line_length_limit_exceeded</tt> exception is thrown. </p><p>Looping over all the lines in a file can be done in the following way. </p><pre class="prettyprint">LineReader in(...);
-while(char*line = in.next_line()){
- ...
-}</pre><p>The remaining functions are mainly used used to format error messages. The file line indicates the current position in the file, i.e., after the first <tt>next_line</tt> call it is 1 and after the second 2. Before the first call it is 0. The file name is truncated as internally C-strings are used to avoid <tt>std::bad_alloc</tt> exceptions during error reporting. </p><p><strong>Note:</strong> It is not possible to exchange the line termination character. </p><h1><a name="CSVReader"></a><tt>CSVReader</tt><a href="#CSVReader" class="section_anchor"></a></h1><p><tt>CSVReader</tt> uses policies. These are classes with only static members to allow core functionality to be exchanged in an efficient way. </p><pre class="prettyprint">template&lt;
- unsigned column_count,
- class trim_policy = trim_chars&lt;&#x27; &#x27;, &#x27;\t&#x27;&gt;,
- class quote_policy = no_quote_escape&lt;&#x27;,&#x27;&gt;,
- class overflow_policy = throw_on_overflow,
- class comment_policy = no_comment
-&gt;
-class CSVReader{
-public:
- // Constructors
- CSVReader(some_string_type file_name);
- CSVReader(some_string_type file_name, std::FILE*file);
-
- // Parsing Header
- void read_header(ignore_column ignore_policy, some_string_type col_name1, some_string_type col_name2, ...);
- void set_header(some_string_type col_name1, some_string_type col_name2, ...);
- bool has_column(some_string_type col_name)const;
-
- // Read
- bool read_row(ColType1&amp;col1, ColType2&amp;col2, ...);
-
- // File Location
- void set_file_line(unsigned);
- unsigned get_file_line(unsigned)const;
- void set_file_name(some_string_type file_name);
- const char*get_truncated_file_name()const;
-};</pre><p>The <tt>column_count</tt> template parameter indicates how many columns you want to read from the CSV file. This must not necessarily coincide with the actual number of columns in the file. The three policies govern various aspects of the parsing. </p><p>The trim policy indicates what characters should be ignored at the begin and the end of every column. The default ignores spaces and tabs. This makes sure that </p><pre class="prettyprint">a,b,c
-1,2,3</pre><p>is interpreted in the same way as </p><pre class="prettyprint"> a, b, c
-1 , 2, 3</pre><p>The trim_chars can take any number of template parameters. For example <tt>trim_chars&lt;&#x27; &#x27;, &#x27;\t&#x27;, &#x27;_&#x27;&gt; </tt>is also valid. If no character should be trimmed use <tt>trim_chars&lt;&gt;</tt>. </p><p>The quote policy indicates how string should be escaped. It also specifies the column separator. The predefined policies are: </p><ul><li><tt>no_quote_escape&lt;sep&gt;</tt> : Strings are not escaped. &quot;<tt>sep</tt>&quot; is used as column separator. </li><li><tt>double_quote_escape&lt;sep, quote&gt;</tt> : Strings are escaped using quotes. Quotes are escaped using two consecutive quotes. &quot;<tt>sep</tt>&quot; is used as column separator and &quot;<tt>quote</tt>&quot; as quoting character. </li></ul><p><strong>Important</strong>: Quoting can be quite expensive. Disable it if you do not need it. </p><p>The overflow policy indicates what should be done if the integers in the input are too large to fit into the variables. There following policies are predefined: </p><ul><li><tt>throw_on_overflow</tt> : Throw an <tt>error::integer_overflow</tt> or <tt>error::integer_underflow</tt> exception. </li><li><tt>ignore_overflow</tt> : Do nothing and let the overflow happen. </li><li><tt>set_to_max_on_overflow</tt> : Set the value to <tt>numeric_limits&lt;...&gt;::max()</tt> (or to the min-pendant). </li></ul><p>The comment policy allows to skip lines based on some criteria. Valid predefined policies are: </p><ul><li><tt>no_comment</tt> : Do not ignore any line. </li><li><tt>empty_line_comment</tt> : Ignore all lines that are empty or only contains spaces and tabs. </li><li><tt>single_line_comment&lt;com1, com2, ...&gt;</tt> : Ignore all lines that start with com1 or com2 or ... as the first character. There may not be any space between the beginning of the line and the comment character. </li><li><tt>single_and_empty_line_comment&lt;com1, com2, ...&gt;</tt> : Ignore all empty lines and single line comments. </li></ul><p>Examples: </p><ul><li><tt>CSVReader&lt;4, trim_chars&lt;&#x27; &#x27;&gt;, double_quote_escape&lt;&#x27;,&#x27;,&#x27;\&quot;&#x27;&gt; &gt;</tt> reads 4 columns from a normal CSV file with string escaping enabled. </li><li><tt>CSVReader&lt;3, trim_chars&lt;&#x27; &#x27;&gt;, no_quote_escape&lt;&#x27;\t&#x27;&gt;, single_line_comment&lt;&#x27;#&#x27;&gt; &gt;</tt> reads 3 columns from a tab separated file with string escaping disabled. Lines starting with a # are ignored. </li></ul><p>The constructors and the file location functions are exactly the same as for <tt>LineReader</tt>. See its documentation for details. </p><p>There are three methods that deal with headers. The <tt>read_header</tt> methods reads a line from the file and rearranges the columns to match that order. It also checks whether all necessary columns are present. The <tt>set_header</tt> method does <strong>not</strong> read any input. Use it if the file does not have any header. Obviously it is impossible to rearrange columns or check for their availability when using it. The order in the file and in the program must match when using <tt>set_header</tt>. The <tt>has_column</tt> method checks whether a column is present in the file. The first argument of <tt>read_header</tt> is a bitfield that determines how the function should react to column mismatches. The default behavior is to throw an <tt>error::extra_column_in_header</tt> exception if the file contains more columns than expected and an <tt>error::missing_column_in_header</tt> when there are not enough. This behavior can be altered using the following flags. </p><ul><li><tt>ignore_no_column</tt>: The default behavior, no flags are set </li><li><tt>ignore_extra_column</tt>: If a column with a name is in the file but not in the argument list, then it is silently ignored. </li><li><tt>ignore_missing_column</tt>: If a column with a name is not in the file but is in the argument list, then <tt>read_row</tt> will not modify the corresponding variable. </li></ul><p>When using <tt>ignore_column_missing</tt> it is a good idea to initialize the variables passed to <tt>read_row</tt> with a default value, for example: </p><pre class="prettyprint">// The file only contains column &quot;a&quot;
-CSVReader&lt;2&gt;in(...);
-in.read_header(ignore_missing_column, &quot;a&quot;, &quot;b&quot;);
-int a,b = 42;
-while(in.read_row(a,b)){
- // a contains the value from the file
- // b is left unchanged by read_row, i.e., it is 42
-}</pre><p>If only some columns are optional or their default value depends on other columns you have to use <tt>has_column</tt>, for example: </p><pre class="prettyprint">// The file only contains the columns &quot;a&quot; and &quot;b&quot;
-CSVReader&lt;2&gt;in(...);
-in.read_header(ignore_missing_column, &quot;a&quot;, &quot;b&quot;, &quot;sum&quot;);
-if(!in.has_column(&quot;a&quot;) || !in.has_column(&quot;b&quot;))
- throw my_neat_error_class();
-bool has_sum = in.has_column(&quot;sum&quot;);
-int a,b,sum;
-while(in.read_row(a,b,sum)){
- if(!has_sum)
- sum = a+b;
-}</pre><p><strong>Important</strong>: Do not call <tt>has_column</tt> from within the read-loop. It would work correctly but significantly slowdown processing. </p><p>If two columns have the same name an error::duplicated_column_in_header exception is thrown. If <tt>read_header</tt> is called but the file is empty a <tt>error::header_missing</tt> exception is thrown. </p><p>The <tt>read_row</tt> function reads a line, splits it into the columns and arranges them correctly. It trims the entries and unescapes them. If requested the content is interpreted as integer or as floating point. The variables passed to read_row may be of the following types. </p><ul><li>builtin signed integer: These are <tt>signed char</tt>, <tt>short</tt>, <tt>int</tt>, <tt>long</tt> and <tt>long long</tt>. The input must be encoded as a base 10 ASCII number optionally preceded by a + or -. The function detects whether the integer is too large would overflow (or underflow) and behaves as indicated by overflow_policy. </li><li>builtin unsigned integer: Just as the signed counterparts except that a leading + or - is not allowed. </li><li>builtin floating point: These are <tt>float</tt>, <tt>double</tt> and <tt>long double</tt>. The input may have a leading + or -. The number must be base 10 encoded. The decimal point may either be a dot or a comma. (Note that a comma will only work if it is not also used as column separator or the number is escaped.) A base 10 exponent may be specified using the &quot;1e10&quot; syntax. The &quot;e&quot; may be lower- or uppercase. Examples for valid floating points are &quot;1&quot;, &quot;-42.42&quot; and &quot;+123.456E789&quot;. The input is rounded to the next floating point or infinity if it is too large or small. </li><li><tt>char</tt>: The column content must be a single character. </li><li><tt>std::string</tt>: The column content is assigned to the string. The std::string is filled with the trimmed and unescaped version. </li><li><tt>char*</tt>: A pointer directly into the buffer. The string is trimmed and unescaped and null terminated. This pointer stays valid until read_row is called again or the CSVReader is destroyed. Use this for user defined types. </li></ul><p>Note that there is no inherent overhead to using <tt>char*</tt> and then interpreting it compared to using one of the parsers directly build into <tt>CSVReader</tt>. The builtin number parsers are pure convenience. If you need a slightly different syntax then use <tt>char*</tt> and do the parsing yourself. </p>
- </div>
- </div>
- </td><tr>
-</table>
- </div>
-
-
-
- </body>
-</html>
-
-
diff --git a/debian/doc/Documentation.pdf b/debian/doc/Documentation.pdf
deleted file mode 100644
index bb78956..0000000
--- a/debian/doc/Documentation.pdf
+++ /dev/null
Binary files differ
diff --git a/debian/doc/convert.sh b/debian/doc/convert.sh
deleted file mode 100755
index f8544ed..0000000
--- a/debian/doc/convert.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-wkhtmltopdf Documentation.html Documentation.pdf
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index 4b24dc5..0000000
--- a/debian/docs
+++ /dev/null
@@ -1,2 +0,0 @@
-debian/doc/Documentation.html
-debian/doc/Documentation.pdf
diff --git a/debian/install b/debian/install
deleted file mode 100644
index a1b230e..0000000
--- a/debian/install
+++ /dev/null
@@ -1 +0,0 @@
-csv.h usr/include/libfccp/
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 0902082..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/make -f
-
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-PKD = $(abspath $(dir $(MAKEFILE_LIST)))
-PKG = $(word 2,$(shell dpkg-parsechangelog -l$(PKD)/changelog | grep ^Source))
-VER = $(shell dpkg-parsechangelog -l$(PKD)/changelog -SVersion | cut -d- -f1)
-
-%:
- dh $@
-
-override_dh_auto_build:
-
-get-orig-source: $(info I: $(PKG)_$(VER))
- cd ${CURDIR}
- wget https://github.com/ben-strasser/fast-cpp-csv-parser/blob/master/csv.h -O csv.h
- tar -cJf ../$(PKG)_$(VER).orig.tar.xz *.h
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/source/include-binaries b/debian/source/include-binaries
deleted file mode 100644
index 8fecf38..0000000
--- a/debian/source/include-binaries
+++ /dev/null
@@ -1 +0,0 @@
-debian/doc/Documentation.pdf
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 32dadcb..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,7 +0,0 @@
-version=3
-
-#
-# This package needs only one file from a git repository.
-# The filename has no release infos.
-# So the this watch file is not used.
-# \ No newline at end of file