Transym OCR wrapper functions for Harbour

User avatar
reinaldocrespo
Posts: 918
Joined: Thu Nov 17, 2005 5:49 pm
Location: Fort Lauderdale, FL

Transym OCR wrapper functions for Harbour

Post by reinaldocrespo »

Hello everyone;

I finally got Transym OCR to work from API using (x)Harbour. Here is the code in case anyone is interested in the future.

Code: Select all

//Reinaldo Crespo-Bazan 3/28/2017 6:39:11 PM
//reinaldo.crespo@gmail.com
//reinaldo.crespo@structuredsystems.com
//This is sample code and wrapper functions to use Transym OCR engine.
//www.Transym.com


#define TOCRJOBTYPE_TIFFFILE 0      //  specifies a tiff file
#define TOCRPROCESS_OPTIONS_MERGE  0x00000001  //Merge options ON
#define TOCRPROCESS_OPTIONS_DESKEW 0x00000010 //deskew ON

FUNCTION MAIN() 
LOCAL TestTifFile := "sample.tif"

   IF !FILE( TestTifFile ) 
      Alert( "testfile sample.tif not found" ) 
      return NIL 
   ENDIF 

   MsgInfo( OCRFromFileUsingTransym( TestTifFile ), "TOCR dll results using default options" ) 

   //I find MergeBreakOff and DeskOff turned off works better for some documents.
   MsgInfo( OCRFromFileUsingTransym( TestTifFile, ;
            TOCRJOBTYPE_TIFFFILE, ;
            TOCRPROCESS_OPTIONS_DESKEW | TOCRPROCESS_OPTIONS_MERGE, "TOCR dll results with Merge and Deskew OFF" ) ) 
   
RETURN NIL 

//-----------------------------------------------------------
//-------------------------------------------------------------------------------------
/**/
#pragma BEGINDUMP
#include <hbapi.h>
#include <windows.h>
#include <TOCRdll.h>
#include <TOCRuser.h>
#include <TOCRerrs.h>

BOOL OCRWait( long JobNo, TOCRJOBINFO2 JobInfo2 );
BOOL GetResults( long JobNo, TOCRRESULTSEX ** Results );
BOOL FormatResults( TOCRRESULTSEX * Results, char * Msg );

//--------------------------------------------------------
//parameters 
// 1. input file with image 
// 2. type of file to ocr defaults to TIFF 
// 3. options mask defaults to 0
//returns OCRed text 

HB_FUNC( OCRFROMFILEUSINGTRANSYM )
{
   TOCRJOBINFO2 JobInfo2;
   TOCRRESULTSEX  * Results = 0;
   long            Status;
   long            JobNo;
   byte            OptionsMask = ISNUM( 3 ) ? hb_parnl( 3 ) : 0x00000000 ;
   char            Msg[8192];
   char * InputFile = ( char * ) hb_parcx( 1 );   //parm 1 is input file

   //Sets Transym to print error to show errors on screen dialog
   TOCRSetConfig( TOCRCONFIG_DEFAULTJOB, TOCRCONFIG_DLL_ERRORMODE, TOCRERRORMODE_MSGBOX );

   memset( &JobInfo2, 0, sizeof( TOCRJOBINFO2 ) );

   JobInfo2.JobType = ISNUM( 2 ) ? hb_parni( 2 ) : TOCRJOBTYPE_TIFFFILE ;
   JobInfo2.InputFile = InputFile ;

   //if MergeBreakOff and DeskewOff parameters are sent then change options accordingly.
   JobInfo2.ProcessOptions.MergeBreakOff = OptionsMask & 0x00000001 ;   //1 bit MergeBreakOff
   JobInfo2.ProcessOptions.DeskewOff = OptionsMask>>1 & 0x00000001 ;    //2nd bit DeskewOff

   Status = TOCRInitialise( &JobNo );

   if ( Status == TOCR_OK ) {
      if ( OCRWait( JobNo, JobInfo2 ) ) {
         if ( GetResults( JobNo, &Results ) ) {

            FormatResults( Results, Msg );
            hb_xfree( Results );
         }
      }

      TOCRShutdown( JobNo );
   }
   hb_retc( Msg ) ;
}

//--------------------------------------------------------
BOOL OCRWait( long JobNo, TOCRJOBINFO2 JobInfo2 )
{
    long                Status;
    long                JobStatus;
    long                ErrorMode;
    char                Msg[4096];

    Status = TOCRDoJob2( JobNo, &JobInfo2 );
    if (Status == TOCR_OK) {
        Status = TOCRWaitForJob(JobNo, &JobStatus);
    }
    
    if (Status == TOCR_OK && JobStatus == TOCRJOBSTATUS_DONE)
    {
        return TRUE;
    } else {
        // If something hass gone wrong extract the error message and log or return 
        // not doing anything with it for the time being.
        // Check that the OCR engine hasn't already displayed a message first.
        TOCRGetConfig(JobNo, TOCRCONFIG_DLL_ERRORMODE, &ErrorMode);
        if ( ErrorMode == TOCRERRORMODE_NONE ) {
            TOCRGetJobStatusMsg(JobNo, Msg);
        }
        return FALSE;
    }
} // OCRWait()


//--------------------------------------------------------
// Get the results from TOCR
BOOL getresults(long JobNo, long mode, void **Results)
{
   long                Status;
   long                ResultsInf;
   char                Msg[4096];

   Status = TOCRGetJobResultsEx(JobNo, mode, &ResultsInf, 0);

   if ( Status != TOCR_OK ) return FALSE;

   if ( ResultsInf > 0 ) {
      // Allocate memory for results
      *Results = ( char * ) hb_xgrab( ResultsInf + 1 );

      // Retrieve the results
      Status = TOCRGetJobResultsEx(JobNo, mode, &ResultsInf, *Results);
      if ( Status != TOCR_OK ) {
         hb_xfree( Results );
         *Results = 0;
         return FALSE;
      }
   } else return FALSE ;

   return TRUE;
} // getresults()


//--------------------------------------------------------
// Get extended results
BOOL GetResults( long JobNo, TOCRRESULTSEX **Results )
{
    return getresults( JobNo, TOCRGETRESULTS_EXTENDED, (void **)Results );
} // GetResults()


//--------------------------------------------------------
// Convert extended results to a string
BOOL FormatResults(TOCRRESULTSEX *Results, char *Msg)
{
   long            ItemNo;
   long            APos = 0;
   BOOL            Status = FALSE;

   if ( Results->Hdr.NumItems > 0 ) {
      for (ItemNo = 0; ItemNo < Results->Hdr.NumItems; ItemNo ++ ) {
         if ( Results->Item[ItemNo].OCRCha == '\r' )
            Msg[APos] = '\n';
         else
            Msg[APos] = (char)Results->Item[ItemNo].OCRCha;
         APos ++;
      }
      Msg[APos] = 0;
      Status = TRUE;
   }

    return Status;
} // FormatResults()

#pragma ENDDUMP  
Reinaldo.
User avatar
Silvio.Falconi
Posts: 4956
Joined: Thu Oct 18, 2012 7:17 pm

Re: Transym OCR wrapper functions for Harbour

Post by Silvio.Falconi »

run with demo Evaluate TOCR ?

How I must make to create a test sample ?
I use : FiveWin for Harbour August 2020 (Revision) - Harbour 3.2.0dev (r1712141320) - Bcc7.30 - xMate ver. 1.15.3 - PellesC
User avatar
reinaldocrespo
Posts: 918
Joined: Thu Nov 17, 2005 5:49 pm
Location: Fort Lauderdale, FL

Re: Transym OCR wrapper functions for Harbour

Post by reinaldocrespo »

Hi Silvio;

Transym has a demo that runs for 30 days (I think). You can download from the webpage. After creating the lib from the dll you only have to add the lib to the link script. That's all you have to do to make the project. It is all straight forward. To create a .lib from .dll use ImpLib.exe which is found on the borland bin directory.

http://www.transym.com


Good luck;

Reinaldo.
User avatar
damianodec
Posts: 372
Joined: Wed Jun 06, 2007 2:58 pm
Location: Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by damianodec »

hi Reinaldo,
I'm interested to OCR.
are you using this product?
are there any others product?

any help?

thanks
FiveWin for xHarbour 17.09 - Sep. 2017 - Embarcadero C++ 7.00 for Win32
FWH 64 for Harbour 19.06 (MSVC++) Jun. 2019 - Harbour 3.2.0dev (r1904111533)
Visual Studio 2019 - Pelles C V.8.00.60 (Win64)
User avatar
reinaldocrespo
Posts: 918
Joined: Thu Nov 17, 2005 5:49 pm
Location: Fort Lauderdale, FL

Re: Transym OCR wrapper functions for Harbour

Post by reinaldocrespo »

Hello Mr. Damianoodec;

Yes, I'm using Transym. I used the free google OCR libs (Leptonica and Tesseract) for years before moving to Transym. Tresseract works pretty well depending on the document being fed and it can be trained so it gets better with time if documents and fonts are the same type. The main problem with Tess is that you are pretty much on your own when it comes to Harbour and it seems Google has released that product after abandoning it.

In all honesty, I don't remember my code although I'm pretty sure I had to write some c. It has been working without my intervention for years and my mind has been somewhere else for a very long time. If you need help, I can try to find time to go back and review my code. But, in short, I do recommend Transym over Tess.

I hope that helps,


Reinaldo.
User avatar
damianodec
Posts: 372
Joined: Wed Jun 06, 2007 2:58 pm
Location: Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by damianodec »

hi Reinaldo,
I'd like understand OCR.
Are you using transym and FWH ?
in Transym site tehre are:
TOCR version 5.0, C++ samples code and c# samples code.
inside C++ samples there are a lot of folder and files.
have you any example?

thank you.
FiveWin for xHarbour 17.09 - Sep. 2017 - Embarcadero C++ 7.00 for Win32
FWH 64 for Harbour 19.06 (MSVC++) Jun. 2019 - Harbour 3.2.0dev (r1904111533)
Visual Studio 2019 - Pelles C V.8.00.60 (Win64)
User avatar
reinaldocrespo
Posts: 918
Joined: Thu Nov 17, 2005 5:49 pm
Location: Fort Lauderdale, FL

Re: Transym OCR wrapper functions for Harbour

Post by reinaldocrespo »

Hello Damlanodec;

I just realized I had already shared sample code to test Transym on this same thread. It is the first post on this thread. At the very top.

Now that I look at it; the beauty of that code is that it is a self-contained minimized sample app-code that produces results. Notice on my sample code the wrapper functions you will need and a few other c functions that can help.

Please look at that code and let me know how it works for you. The lib works great.

Thank you.


Reinaldo.
User avatar
damianodec
Posts: 372
Joined: Wed Jun 06, 2007 2:58 pm
Location: Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by damianodec »

hi Reinaldo,
when I try to compile you example by buildx.bat inside samples folder I get thi:

Code: Select all

Embarcadero C++ 7.00 for Win32 Copyright (c) 1993-2015 Embarcadero Technologies, Inc.
ocrreina.c:
Error E2046 ocrreina.prg 35: Bad file name format in include directive
Error E2046 ocrreina.prg 36: Bad file name format in include directive
Error E2046 ocrreina.prg 37: Bad file name format in include directive
Error E2303 ocrreina.prg 39: Type name expected
Error E2303 ocrreina.prg 40: Type name expected
Error E2147 ocrreina.prg 41: 'TOCRRESULTSEX' cannot start a parameter declaration
Error E2451 ocrreina.prg 52: Undefined symbol 'TOCRJOBINFO2' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2379 ocrreina.prg 52: Statement missing ; in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 53: Undefined symbol 'TOCRRESULTSEX' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 53: Undefined symbol 'Results' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2140 ocrreina.prg 54: Declaration is not allowed here in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2140 ocrreina.prg 55: Declaration is not allowed here in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2140 ocrreina.prg 56: Declaration is not allowed here in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2140 ocrreina.prg 57: Declaration is not allowed here in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2140 ocrreina.prg 58: Declaration is not allowed here in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 61: Undefined symbol 'TOCRCONFIG_DEFAULTJOB' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 61: Undefined symbol 'TOCRCONFIG_DLL_ERRORMODE' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 61: Undefined symbol 'TOCRERRORMODE_MSGBOX' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8065 ocrreina.prg 61: Call to function 'TOCRSetConfig' with no prototype in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 63: Undefined symbol 'JobInfo2' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2109 ocrreina.prg 63: Not an allowed type in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 65: Undefined symbol 'TOCRJOBTYPE_TIFFFILE' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8065 ocrreina.prg 72: Call to function 'TOCRInitialise' with no prototype in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2451 ocrreina.prg 74: Undefined symbol 'TOCR_OK' in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8065 ocrreina.prg 75: Call to function 'OCRWait' with no prototype in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8065 ocrreina.prg 76: Call to function 'GetResults' with no prototype in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8065 ocrreina.prg 83: Call to function 'TOCRShutdown' with no prototype in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8004 ocrreina.prg 86: 'InputFile' is assigned a value that is never used in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8004 ocrreina.prg 86: 'OptionsMask' is assigned a value that is never used in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Warning W8004 ocrreina.prg 86: 'Status' is assigned a value that is never used in function HB_FUN_OCRFROMFILEUSINGTRANSYM
Error E2303 ocrreina.prg 89: Type name expected
*** 23 errors in Compile ***
* Linking errors *
can y ou help me?

thank you
FiveWin for xHarbour 17.09 - Sep. 2017 - Embarcadero C++ 7.00 for Win32
FWH 64 for Harbour 19.06 (MSVC++) Jun. 2019 - Harbour 3.2.0dev (r1904111533)
Visual Studio 2019 - Pelles C V.8.00.60 (Win64)
User avatar
Enrico Maria Giordano
Posts: 7355
Joined: Thu Oct 06, 2005 8:17 pm
Location: Roma - Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by Enrico Maria Giordano »

Code: Select all

Error E2046 ocrreina.prg 35: Bad file name format in include directive
Can you show the line 35, please?

EMG
User avatar
damianodec
Posts: 372
Joined: Wed Jun 06, 2007 2:58 pm
Location: Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by damianodec »

hi Enrico,

line 35, 36 and 37

Code: Select all

#include <TOCRdll.h>
#include <TOCRuser.h>
#include <TOCRerrs.h>
FiveWin for xHarbour 17.09 - Sep. 2017 - Embarcadero C++ 7.00 for Win32
FWH 64 for Harbour 19.06 (MSVC++) Jun. 2019 - Harbour 3.2.0dev (r1904111533)
Visual Studio 2019 - Pelles C V.8.00.60 (Win64)
User avatar
damianodec
Posts: 372
Joined: Wed Jun 06, 2007 2:58 pm
Location: Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by damianodec »

yes that files are inside FWH\include with other .ch fwh files (fivewin.ch and others)
FiveWin for xHarbour 17.09 - Sep. 2017 - Embarcadero C++ 7.00 for Win32
FWH 64 for Harbour 19.06 (MSVC++) Jun. 2019 - Harbour 3.2.0dev (r1904111533)
Visual Studio 2019 - Pelles C V.8.00.60 (Win64)
User avatar
damianodec
Posts: 372
Joined: Wed Jun 06, 2007 2:58 pm
Location: Italia
Contact:

Re: Transym OCR wrapper functions for Harbour

Post by damianodec »

hi,
I'm over my error (thank you Enrico), now I get this:

Code: Select all

Compiling...
xHarbour 1.2.3 Intl. (SimpLex) (Build 20161218)
Copyright 1999-2016, http://www.xharbour.org http://www.harbour-project.org/
Compiling 'ocrreina.prg' and generating preprocessed output to 'ocrreina.ppo'...
Generating C source output to 'ocrreina.c'...
Done.
Lines 29, Functions/Procedures 1, pCodes 42
Embarcadero C++ 7.00 for Win32 Copyright (c) 1993-2015 Embarcadero Technologies, Inc.
ocrreina.c:
Turbo Incremental Link 6.70 Copyright (c) 1997-2014 Embarcadero Technologies, Inc.
Error: 'C:\XHARBOUR1709\LIB\TOCRDLL.LIB' contains invalid OMF record, type 0x21 (possibly COFF)
* Linking errors *
FiveWin for xHarbour 17.09 - Sep. 2017 - Embarcadero C++ 7.00 for Win32
FWH 64 for Harbour 19.06 (MSVC++) Jun. 2019 - Harbour 3.2.0dev (r1904111533)
Visual Studio 2019 - Pelles C V.8.00.60 (Win64)
Post Reply