00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifdef CWDEBUG
00025 #define BOOST_SPIRIT_DEBUG
00026 #endif
00027
00028 #ifndef USE_PCH
00029 #include "sys.h"
00030 #include <cstring>
00031 #include <ctime>
00032 #include <iomanip>
00033 #include "debug.h"
00034 #include <glib/gmacros.h>
00035 #ifdef CWDEBUG
00036 #include <libcwd/buf2str.h>
00037 #endif
00038 #endif
00039
00040 #include "PgnDatabase.h"
00041 #include "PgnGrammar.h"
00042 #include "chattr.h"
00043 #include "Color.h"
00044
00045 namespace cwchess {
00046 namespace pgn {
00047
00048 void Database::process_next_data_block(char const* data, size_t size)
00049 {
00050 }
00051
00052 void DatabaseSeekable::load(void)
00053 {
00054 if (!Glib::thread_supported())
00055 DoutFatal(dc::fatal, "DatabaseSeekable::load: Threading not initialized. Call Glib::init_thread() at the start of main().");
00056 M_file->read_async(sigc::mem_fun(this,& DatabaseSeekable::read_async_open_ready), M_cancellable);
00057 }
00058
00059 void DatabaseSeekable::read_async_open_ready(Glib::RefPtr<Gio::AsyncResult>& result)
00060 {
00061 M_file_input_stream = M_file->read_finish(result);
00062 M_buffer = new MemoryBlockList(sigc::mem_fun(*this,& DatabaseSeekable::need_more_data));
00063 M_read_thread = Glib::Thread::create(sigc::mem_fun(*this,& DatabaseSeekable::read_thread), false);
00064 M_new_block = MemoryBlockNode::create(S_buffer_size);
00065
00066 GInputStream* stream = M_file_input_stream->InputStream::gobj();
00067 g_input_stream_read_async(stream, M_new_block->block_begin(), S_buffer_size,
00068 G_PRIORITY_DEFAULT, M_cancellable->gobj(),& DatabaseSeekable::read_async_ready, this);
00069 M_processing_finished.connect(sigc::mem_fun(*this,& DatabaseSeekable::processing_finished));
00070 }
00071
00072 void DatabaseSeekable::need_more_data(void)
00073 {
00074 M_new_block = MemoryBlockNode::create(S_buffer_size);
00075 g_input_stream_read_async(M_file_input_stream->InputStream::gobj(), M_new_block->block_begin(), S_buffer_size,
00076 G_PRIORITY_DEFAULT, M_cancellable->gobj(),& DatabaseSeekable::read_async_ready, this);
00077 }
00078
00079
00080 inline void DatabaseSeekable::read_async_ready(GObject* source_object, GAsyncResult* async_res)
00081 {
00082 GInputStream* stream = M_file_input_stream->InputStream::gobj();
00083 GError* error = NULL;
00084 gssize len = g_input_stream_read_finish(stream, async_res,& error);
00085 if (len == -1)
00086 DoutFatal(dc::core, "read_finish() returned -1");
00087 if (len > 0)
00088 {
00089 M_bytes_read += len;
00090 Dout(dc::notice, "Appending a block with " << len << " bytes to the buffer.");
00091
00092
00093 M_buffer->append(M_new_block, len);
00094 }
00095 else
00096 {
00097 Dout(dc::notice, "g_input_stream_read_finish() returned 0. Closing buffer.");
00098
00099
00100
00101
00102 M_buffer->close();
00103 }
00104 }
00105
00106 void DatabaseSeekable::read_async_ready(GObject* source_object, GAsyncResult* async_res, gpointer user_data)
00107 {
00108 DatabaseSeekable* database_seekable = reinterpret_cast<DatabaseSeekable*>(user_data);
00109 database_seekable->read_async_ready(source_object, async_res);
00110 }
00111
00112 DatabaseSeekable::~DatabaseSeekable()
00113 {
00114
00115 if (M_buffer)
00116 delete M_buffer;
00117 }
00118
00119 namespace {
00120
00121 timespec& operator-=(timespec& t1, timespec const& t2)
00122 {
00123 t1.tv_sec -= t2.tv_sec;
00124 t1.tv_nsec -= t2.tv_nsec;
00125 if (t1.tv_nsec < 0)
00126 {
00127 --t1.tv_sec;
00128 t1.tv_nsec += 1000000000L;
00129 }
00130 return t1;
00131 }
00132
00133 timespec& operator+=(timespec& t1, timespec const& t2)
00134 {
00135 t1.tv_sec += t2.tv_sec;
00136 t1.tv_nsec += t2.tv_nsec;
00137 if (t1.tv_nsec > 999999999L)
00138 {
00139 ++t1.tv_sec;
00140 t1.tv_nsec -= 1000000000L;
00141 }
00142 return t1;
00143 }
00144
00145 std::ostream& operator<<(std::ostream& os, timespec const& t1)
00146 {
00147 return os << t1.tv_sec << '.' << std::setfill('0') << std::setw(9) << t1.tv_nsec;
00148 }
00149
00150 }
00151
00152
00153
00154
00155
00156
00157
00158 #define DEBUG_PARSER 0
00159
00160
00161 template<class ForwardIterator>
00162 struct ScannerData {
00163 ForwardIterator* M_iter;
00164 unsigned int M_line;
00165 unsigned int M_column;
00166 unsigned int M_number_of_characters;
00167 Color M_to_move;
00168 #if DEBUG_PARSER
00169 ForwardIterator M_line_start;
00170
00171 ScannerData(ForwardIterator* iter) : M_iter(iter), M_line(1), M_column(0), M_number_of_characters(0), M_line_start(iter->buffer()) { }
00172 #else
00173 void init(ForwardIterator* iter)
00174 {
00175 M_iter = iter;
00176 M_line = 1;
00177 M_column = 0;
00178 M_number_of_characters = 0;
00179 }
00180 #endif
00181 };
00182
00183 class EndOfFileReached : public std::exception {
00184 };
00185
00186 class ParseError : public std::exception {
00187 };
00188
00189 static EndOfFileReached const end_of_file_reached;
00190
00191
00192 template<class ForwardIterator>
00193 class Scanner {
00194 private:
00195 ScannerData<ForwardIterator> M_current_position;
00196 ForwardIterator const M_end;
00197 std::vector<ScannerData<ForwardIterator> > M_stack;
00198 int M_stack_index;
00199 public:
00200
00201
00202
00203
00204 Scanner(ForwardIterator& iter, ForwardIterator const end) :
00205 #if DEBUG_PARSER
00206 M_current_position(&iter),
00207 #endif
00208 M_end(end), M_stack_index(0)
00209 #if DEBUG_PARSER
00210 { }
00211 #else
00212 { M_current_position.init(&iter); }
00213 #endif
00214
00215 int push_position(void)
00216 {
00217 size_t size = M_stack.size();
00218 if (G_UNLIKELY(size <= M_stack_index))
00219 M_stack.push_back(M_current_position);
00220 else
00221 M_stack[M_stack_index] = M_current_position;
00222 return M_stack_index++;
00223 }
00224
00225 void pop_position(int index)
00226 {
00227 M_current_position = M_stack[index];
00228 M_stack_index = index;
00229 }
00230
00231 #if DEBUG_PARSER
00232
00233
00234
00235
00236 void print_line(void)
00237 {
00238 std::string s(M_current_position.M_line_start,* M_current_position.M_iter);
00239 Dout(dc::parser, "Parsed: \"" << buf2str(s.data(), s.length()) << "\".");
00240 }
00241 #endif
00242
00243
00244
00245
00246
00247
00248
00249
00250 typename ForwardIterator::value_type first_character(void) throw(EndOfFileReached)
00251 {
00252 if (G_UNLIKELY(*M_current_position.M_iter == M_end))
00253 throw end_of_file_reached;
00254 M_current_position.M_number_of_characters = 1;
00255 #if DEBUG_PARSER
00256 M_current_position.M_line_start =* M_current_position.M_iter;
00257 #endif
00258 return** M_current_position.M_iter;
00259 }
00260
00261
00262
00263
00264 typename ForwardIterator::value_type next_character(void)
00265 {
00266 if (G_UNLIKELY(++*M_current_position.M_iter == M_end))
00267 {
00268 #if DEBUG_PARSER
00269 print_line();
00270 #endif
00271
00272 }
00273 ++M_current_position.M_column;
00274 return** M_current_position.M_iter;
00275 }
00276
00277
00278
00279
00280
00281
00282
00283 void eat_white_space(typename ForwardIterator::value_type& current_character)
00284 {
00285 while (is_white_space(current_character))
00286 {
00287 if (G_UNLIKELY(is_eol(current_character)))
00288 eat_eol(current_character);
00289 else
00290 current_character = next_character();
00291 }
00292 }
00293
00294
00295 void eat_line(typename ForwardIterator::value_type& current_character)
00296 {
00297 while (!is_eol(current_character))
00298 current_character = next_character();
00299 }
00300
00301
00302 bool parse_char(typename ForwardIterator::value_type& current_character, char literal)
00303 {
00304 current_character = next_character();
00305 if (current_character != literal)
00306 return false;
00307 current_character = next_character();
00308 return true;
00309 }
00310
00311
00312 bool parse_str(typename ForwardIterator::value_type& current_character, char const* literal)
00313 {
00314 current_character = next_character();
00315 for (char const* p = literal;* p; ++p)
00316 {
00317 if (current_character !=* p)
00318 return false;
00319 current_character = next_character();
00320 }
00321 return true;
00322 }
00323
00324
00325
00326
00327
00328
00329
00330 bool eat_comment(typename ForwardIterator::value_type& current_character)
00331 {
00332 #if DEBUG_PARSER
00333 assert(!is_white_space(current_character));
00334 #endif
00335 if (G_UNLIKELY(is_comment_start(current_character)))
00336 {
00337 if (current_character == '{')
00338 {
00339 while (current_character != '}')
00340 current_character = next_character();
00341 current_character = next_character();
00342 }
00343 else
00344 {
00345 eat_line(current_character);
00346 eat_eol(current_character);
00347 }
00348 return true;
00349 }
00350 return false;
00351 }
00352
00353
00354 void eat_white_space_and_comments(typename ForwardIterator::value_type& current_character)
00355 {
00356 eat_white_space(current_character);
00357 while(eat_comment(current_character))
00358 eat_white_space(current_character);
00359 }
00360
00361
00362
00363
00364
00365
00366
00367
00368 bool eat_eol(typename ForwardIterator::value_type& current_character)
00369 {
00370 #if DEBUG_PARSER
00371 assert(is_eol(current_character));
00372 #endif
00373 unsigned int line = M_current_position.M_line + 1;
00374 do
00375 {
00376 ++M_current_position.M_line;
00377 bool saw_carriage_return = (current_character == '\r');
00378 current_character = next_character();
00379 if (saw_carriage_return && current_character == '\n')
00380 current_character = next_character();
00381 if (current_character == '%')
00382 {
00383 eat_line(current_character);
00384 ++line;
00385 }
00386 }
00387 while (is_eol(current_character));
00388 #if DEBUG_PARSER
00389 print_line();
00390 M_current_position.M_line_start =* M_current_position.M_iter;
00391 #endif
00392 M_current_position.M_number_of_characters += M_current_position.M_column;
00393 M_current_position.M_column = 0;
00394 return M_current_position.M_line > line;
00395 }
00396
00397
00398
00399
00400
00401
00402 void decode_string(typename ForwardIterator::value_type& current_character)
00403 {
00404 do
00405 {
00406 current_character = next_character();
00407 }
00408 while(current_character != '"');
00409
00410 current_character = next_character();
00411 }
00412
00413
00414 unsigned int line(void) const { return M_current_position.M_line; }
00415
00416 unsigned int column(void) const { return M_current_position.M_column + 1; }
00417
00418
00419
00420 unsigned int number_of_characters(void) const { return M_current_position.M_number_of_characters + M_current_position.M_column; }
00421
00422
00423 Color to_move(void) const { return M_current_position.M_to_move; }
00424
00425
00426 void reset_game_state(void)
00427 {
00428 M_current_position.M_to_move = white;
00429 }
00430
00431 #ifdef CWDEBUG
00432 template<typename T>
00433 friend std::ostream& operator<<(std::ostream& os, Scanner<T> const& scanner);
00434 #endif
00435 };
00436
00437 #ifdef CWDEBUG
00438
00439 template<typename T>
00440 std::ostream& operator<<(std::ostream& os, Scanner<T> const& const_scanner)
00441 {
00442 Scanner<T> scanner(const_scanner);
00443 if (*scanner.M_current_position.M_iter == scanner.M_end)
00444 os << "<EOF>";
00445 else
00446 {
00447 char c =** scanner.M_current_position.M_iter;
00448 try
00449 {
00450 do
00451 {
00452 os << libcwd::char2str(c);
00453 }
00454 while (!is_eol(c = scanner.next_character()));
00455 }
00456 catch(EndOfFileReached&)
00457 {
00458 os << "<EOF>";
00459 }
00460 }
00461 return os;
00462 }
00463 #endif
00464
00465 typedef Scanner<MemoryBlockList::iterator> scanner_t;
00466
00467 namespace {
00468
00469
00470
00471
00472
00473
00474
00475 inline bool decode_tagname(char& c, scanner_t& scanner)
00476 {
00477 if (G_UNLIKELY(!is_tagname_begin(c)))
00478 return false;
00479 while(is_tagname_continuation(c))
00480 c = scanner.next_character();
00481 return true;
00482 }
00483
00484
00485
00486
00487
00488
00489 inline bool correct_string(char& c, scanner_t& scanner)
00490 {
00491 if (c != '"')
00492 return false;
00493
00494 c = scanner.next_character();
00495
00496 while(!is_quote_or_eol(c))
00497 c = scanner.next_character();
00498
00499 if (c != '"')
00500 return false;
00501
00502 c = scanner.next_character();
00503 return true;
00504 }
00505
00506
00507
00508
00509
00510 inline bool correct_tag_pair(char& c, scanner_t& scanner)
00511 {
00512 #if DEBUG_PARSER
00513 assert(c == '[');
00514 #endif
00515
00516 c = scanner.next_character();
00517 scanner.eat_white_space(c);
00518 if (G_UNLIKELY(!decode_tagname(c, scanner)))
00519 return false;
00520 scanner.eat_white_space(c);
00521 if (G_UNLIKELY(!correct_string(c, scanner)))
00522 return false;
00523 scanner.eat_white_space(c);
00524 if (G_UNLIKELY(c != ']'))
00525 return false;
00526
00527 c = scanner.next_character();
00528 return true;
00529 }
00530
00531 inline bool tag_pair(char& c, scanner_t& scanner)
00532 {
00533 #if DEBUG_PARSER
00534 assert(c == '[');
00535 #endif
00536
00537 c = scanner.next_character();
00538 scanner.eat_white_space_and_comments(c);
00539 if (G_UNLIKELY(!decode_tagname(c, scanner)))
00540 return false;
00541 scanner.eat_white_space_and_comments(c);
00542 if (G_UNLIKELY(is_tag_separator_junk(c)))
00543 {
00544
00545 c = scanner.next_character();
00546 scanner.eat_white_space_and_comments(c);
00547 }
00548 if (G_UNLIKELY(c != '"'))
00549 return false;
00550 scanner.decode_string(c);
00551 scanner.eat_white_space_and_comments(c);
00552 if (G_UNLIKELY(c != ']'))
00553 return false;
00554
00555 c = scanner.next_character();
00556 return true;
00557 }
00558
00559 bool decode_movetext_section_white(char& c, scanner_t& scanner) throw(ParseError)
00560 {
00561 throw ParseError();
00562 }
00563
00564 bool decode_movetext_section_black(char& c, scanner_t& scanner) throw(ParseError)
00565 {
00566 throw ParseError();
00567 }
00568
00569 bool decode_movetext_section(char& c, scanner_t& scanner)
00570 {
00571 if (scanner.to_move() == white)
00572 return decode_movetext_section_white(c, scanner);
00573 else
00574 return decode_movetext_section_black(c, scanner);
00575 }
00576
00577 bool decode_game_termination(char& c, scanner_t& scanner)
00578 {
00579 char d = c;
00580 c = scanner.next_character();
00581 if (d == '0' && c == '-')
00582 return scanner.parse_char(c, '1');
00583 else if (d == '1' && c == '/')
00584 return scanner.parse_str(c, "2-1/2");
00585 return d == '*';
00586 }
00587
00588 }
00589
00590 void DatabaseSeekable::read_thread(void)
00591 {
00592 Debug(debug::init_thread());
00593 Dout(dc::notice, "DatabaseSeekable::read_thread started.");
00594
00595 timespec start_time_real, end_time_real;
00596 timespec start_time_process, end_time_process;
00597 timespec start_time_thread, end_time_thread;
00598
00599 clock_gettime(CLOCK_REALTIME,& start_time_real);
00600 clock_gettime(CLOCK_PROCESS_CPUTIME_ID,& start_time_process);
00601 clock_gettime(CLOCK_THREAD_CPUTIME_ID,& start_time_thread);
00602
00603 scanner_t scanner(M_buffer->begin(), M_buffer->end());
00604
00605 try
00606 {
00607 int PGN_game_start;
00608 int PGN_movetext_section_start;
00609
00610 bool saw_empty_line = true;
00611
00612
00613 char c = scanner.first_character();
00614
00615
00616 for (;;)
00617 {
00618
00619 scanner.reset_game_state();
00620
00621 try
00622 {
00623
00624
00625
00626
00627
00628
00629 scanner.eat_white_space(c);
00630
00631 do
00632 {
00633 if (c == '[')
00634 {
00635 PGN_game_start = scanner.push_position();
00636
00637
00638
00639 if ((saw_empty_line && tag_pair(c, scanner)) ||
00640 (!saw_empty_line && correct_tag_pair(c, scanner)))
00641 {
00642
00643 Dout(dc::parser, "After first tag pair of PGN game: " << scanner.line() << ':' << scanner.column());
00644 break;
00645 }
00646 }
00647
00648 scanner.eat_line(c);
00649
00650 saw_empty_line = scanner.eat_eol(c);
00651 }
00652 while(1);
00653
00654
00655
00656
00657
00658
00659 scanner.eat_white_space_and_comments(c);
00660 while(c == '[')
00661 {
00662 if (G_UNLIKELY(!tag_pair(c, scanner)))
00663 break;
00664 scanner.eat_white_space_and_comments(c);
00665 }
00666
00667
00668 if (c == '1')
00669 {
00670
00671
00672 PGN_movetext_section_start = scanner.push_position();
00673 }
00674 if (!decode_movetext_section(c, scanner))
00675 {
00676
00677 scanner.pop_position(PGN_movetext_section_start);
00678 c = '1';
00679 }
00680
00681 if (decode_game_termination(c, scanner))
00682 {
00683
00684 scanner.eat_white_space_and_comments(c);
00685
00686
00687
00688 saw_empty_line = true;
00689 continue;
00690 }
00691
00692
00693 Dout(dc::parser, "Parsing stopped at " << scanner.line() << ':' << scanner.column() << " at \"" << scanner << "\".");
00694 }
00695 catch(ParseError&)
00696 {
00697
00698 continue;
00699 }
00700 break;
00701 }
00702 }
00703 catch(EndOfFileReached&)
00704 {
00705 }
00706
00707 clock_gettime(CLOCK_REALTIME,& end_time_real);
00708 clock_gettime(CLOCK_PROCESS_CPUTIME_ID,& end_time_process);
00709 clock_gettime(CLOCK_THREAD_CPUTIME_ID,& end_time_thread);
00710
00711 end_time_real -= start_time_real;
00712 end_time_process -= start_time_process;
00713 end_time_thread -= start_time_thread;
00714
00715 #if 0
00716 if (!info.hit)
00717 {
00718 std::cout << "Failure to parse anything." << std::endl;
00719 }
00720 else
00721 std::cout << info.length << " characters have been parsed successfully." << std::endl;
00722 #endif
00723
00724 std::cout << "Number of characters: " << scanner.number_of_characters() << '\n';
00725 std::cout << "Number of lines: " << scanner.line() << '\n';
00726
00727 std::cout << "Real time : " << end_time_real << " seconds.\n";
00728 std::cout << "Process time : " << end_time_process << " seconds.\n";
00729 std::cout << "Run time read_thread : " << end_time_thread << " seconds.\n";
00730
00731 double t = end_time_thread.tv_sec + end_time_thread.tv_nsec * 1e-9;
00732 std::cout << "Speed: " << (scanner.number_of_characters() / t / 1048576) << " MB/s." << std::endl;
00733
00734 M_processing_finished.emit();
00735 }
00736
00737 void DatabaseSeekable::processing_finished(void)
00738 {
00739 assert(M_buffer->closed());
00740 delete M_buffer;
00741 M_buffer = NULL;
00742 M_slot_open_finished(M_bytes_read);
00743 }
00744
00745 }
00746 }