Ibrahim-10


Hi,

I am using the Microsoft Office 2003 Document Imaging libraries with Visual Studion 2005. I have the following VB.NET code which processes many tiff files and then at some point it comes up with exception. At which point, it corrupts something because i can not use the VS IDE to open the main form in design mode saying that it encountered an error most likely indication that memory is corrupt.

In any case, i have the MODI from 2003 version 11 installed and used the toolbox and added a reference to my form. The form contains a button which starts a new thread to process files. The part of the processing of files which requires looking into tiff file content requires calling this function below. The code is expected to run through 9749 TIFF files, it comes up with exception on my laptop running windows XP Office 2003. When i use Office 2007, this does not happen (i.e. i don't get the exception). Is there a difference between MS Office Document imaging in 2003 and in 2007 that could cause memory corruption problem I have included the excerpt from the code that does the modi processing, i think that i may also not be releasing COM object handles in the right order. Can anyone help in this forum or do i need to post to another

Out of memory exception on Set_Document of the MODI document object.

What i suspect is that because i don't correct the object handles correctly the first time an image runs through this code, the consequtive times for the remaining images, it fails on tiffDoc.Create(fileName). Any help would be appreciated.

Ibrahim at bramoin@yahoo.com

Code Block

Public Sub TiffEvaluate(ByVal fileName As String, _
ByVal startingPageNum As String, _
ByVal endingPageNum As String, _
ByVal ocrCheck As Boolean) Handles loadFileCheck.TiffEvaluate

CheckTiffImage(modiTiffView, fileName, startingPageNum, endingPageNum, ocrCheck)

End Sub


' This routine is called from another thread, that explains the Me.Invoke logic.
' The function of the remainder of the logic is to use the MODI object and read
' the tiff image compression and after telling it to OCR the document search the
' resultant text string for text matching those specified by starting and ending
' page numbers. These two strings will be different for multiple image TIFF files.
'
' SPECIAL CONSIDERATIONS:
' The OCR software in MODI will sometimes see '1' as 'I'. As a result, we
' need to search for both 'something1' and 'somethingI'
Private Sub CheckTiffImage(ByVal uiElement As Control, _
ByVal fileName As String, _
ByVal startingPageNum As String, _
ByVal endingPageNum As String, _
ByVal ocrCheck As Boolean)

Dim tiffFile As String = fileName
Dim startPage As String = startingPageNum
Dim endPage As String = endingPageNum

If uiElement.InvokeRequired Then
Dim d As New CheckTiffImageCallback(AddressOf CheckTiffImage)
Try
Me.Invoke(d, New Object() {uiElement, tiffFile, startPage, endPage, ocrCheck})
Catch ex As Exception

End Try

Else

Dim result As Boolean = False
Dim left As Integer = 0
Dim top As Integer = 0
Dim tiffDoc As MODI.Document = Nothing
Dim tiffImages As MODI.Images = Nothing
Dim imageLayout As MODI.Layout = Nothing

Try

' the following lines are for ui update to let the user know what is going on.
tiffDocumentCount += 1
lblTIFFDocCount.Text = tiffDocumentCount.ToString()
lblTIFFDocNumber.Text = startPage
lblTiffBoxNumber.Text = tiffFile.Substring(tiffFile.IndexOf("BOX"), _
(tiffFile.LastIndexOfAny("\") - tiffFile.IndexOf("BOX")))


' CAUTION: There are two ways to load the MODI ActiveX Viewer object:
' one way is to assign the filename of the viewer object, that proved buggy since
' the document object is realized when the image is displayed. And because the display
' of the image immediately after the Show method below is not garanteed, it caused
' file mis-read.
' In our implementation, below we create the document first and then assign it to the object.
' This seems to bring the memory usage of the application and the IDE environment down to a
' more stable level.
tiffDoc = New MODI.Document
If Not tiffDoc Is Nothing Then

' opens the tiff file
tiffDoc.Create(tiffFile)

' assigns the MODI viewer object the document object created
modiTiffView.Document = tiffDoc

' displays the MODI viewer object
modiTiffView.DocViewMode = MODI.MiDocviewMode.miDOCVIEWMODE_SINGLEPAGEVIEW
modiTiffView.FitMode = MODI.MiFITMODE.miByWindow
modiTiffView.Show()

tiffImages = tiffDoc.Images

' count of images in the tiff file
Dim imageIndex As Integer = 0

' utility for storing part of a string
Dim strItems As String()

' number representing the current page
Dim currentPageNum As String = ""
Dim docNum As Integer = 0

Dim imageEnum As IEnumerator = tiffImages.GetEnumerator()
While imageEnum.MoveNext

' get the image in the MODI Document's Images object
Dim image As MODI.Image = imageEnum.Current

If ocrCheck = True Then

' modiTiffView.SetScale(2, 2)

' Adjust the image to view lower right corner in the window
' Dim height As Integer = image.PixelHeight
' Dim width As Integer = image.PixelWidth
' left = width - Panel1.Width
' top = height - Panel1.Height
' modiTiffView.MoveRectIntoView(left, top, width + left, height + top)

If Not image Is Nothing Then

' first check the image compression
If image.Compression <> MODI.MiCOMP_TYPE.miCOMP_TIFF_LZW Then

UpdateMessage(DISPLAY_ID.PROPGRESS, _
"...TIF Compression-Check successful for: <" & _
tiffFile & "> found: <" & _
MODI.MiCOMP_TYPE.GetName(image.Compression.GetType(), image.Compression) _
& "> for image number <" & imageIndex.ToString() & ">.")

Else
UpdateMessage(DISPLAY_ID.PROPGRESS, _
"...TIF Compression-Check processing error: <" & _
tiffFile & "> found: " & image.Compression.ToString() & " compression " & _
"for image number <" & imageIndex.ToString() & ">.")

UpdateMessage(DISPLAY_ID.FINAL_RESULT_ERROR, _
"...TIF Compression-Check processing error: <" & _
tiffFile & "> found: " & image.Compression.ToString() & " compression " & _
"for image number <" & imageIndex.ToString() & ">.")

image = Nothing
Exit While
End If

' This will cause it to OCR the current image
image.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, True, True)

' form the currentPage string, by taking the last part of the
' sartPage string and then adding the imageIndex
strItems = startPage.Split("-")
If strItems.Length > 0 Then

Dim docSequenceNum As String = strItems(strItems.Length - 1)

' converted string will be stored in docNum
If Integer.TryParse(docSequenceNum, docNum) Then

docNum += imageIndex

' the problem here is that the leading zeros are lost
docSequenceNum = String.Format(docSequenceNum, docNum)

' include the dash (that is why we have +1 below)
currentPageNum = startPage.Substring(0, startPage.LastIndexOf("-") + 1)


currentPageNum += docSequenceNum
Else
If tiffImages.Count > 1 Then

UpdateMessage(DISPLAY_ID.FINAL_RESULT_ERROR, _
"...TIF image error: <" & _
tiffFile & "> can not convert docNumber <" & _
currentPageNum & "> for images.")

End If
End If

End If

' Layout is an object created by the OCR process
imageLayout = image.Layout
result = SearchOCRForNumber(imageLayout.Text, currentPageNum)
imageLayout = Nothing

If result = True Then

UpdateMessage(DISPLAY_ID.PROPGRESS, _
"...TIFF image OCR was successfully verified for: <" & _
tiffFile & "> image number <" & imageIndex.ToString() & _
"> and document number <" & currentPageNum & ">.")

tiffOcrCount += 1

Else

' Depending on the orientation, the OCR text may not contain what we
' are looking for. So rotate it 90 degrees, OCR and search again.
image.Rotate(90)

image.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, True, True)

imageLayout = image.Layout
result = SearchOCRForNumber(imageLayout.Text, currentPageNum)
imageLayout = Nothing

If result = True Then

UpdateMessage(DISPLAY_ID.PROPGRESS, _
"...TIF image OCR was successfully verified for: <" & _
tiffFile & "> image number <" & imageIndex.ToString() & _
"> and document number <" & currentPageNum & ">.")


tiffOcrCount += 1
Else

UpdateMessage(DISPLAY_ID.PROPGRESS, _
"...TIF image OCR check failed for: <" & _
tiffFile & "> image number <" & _
imageIndex.ToString() & "> and document number: <" & _
currentPageNum & ">.")

UpdateMessage(DISPLAY_ID.FINAL_RESULT_ERROR, _
"...TIF image OCR check failed for: <" & _
tiffFile & "> image number <" & _
imageIndex.ToString() & "> and document number: <" & _
currentPageNum & ">.")

End If

End If

End If

Application.DoEvents()

' release the reference to the image object
image = Nothing

imageIndex += 1
End If
End While

Else

UpdateMessage(DISPLAY_ID.FINAL_RESULT_ERROR, _
"...Error: MS Office Document Imaging was unable to create a TIFF document for <" & _
tiffFile & ">.")

End If
Catch ex As Exception

UpdateMessage(DISPLAY_ID.FINAL_RESULT_ERROR, _
"An exception occurred: <" & ex.Message & ">, this error happened when " & _
"Procedo was trying to process file: <" & tiffFile & ">.")

Finally


' Do more clean up
If Not imageLayout Is Nothing Then
imageLayout = Nothing
End If

If Not tiffImages Is Nothing Then
For Each tiffImage As MODI.Image In tiffImages
tiffImage = Nothing
Next
End If

If Not tiffImages Is Nothing Then
tiffImages = Nothing
End If

If Not tiffDoc Is Nothing Then

tiffDoc.Close(False) ' False means don't save changes
modiTiffView.FileName = ""
tiffDoc = Nothing

End If

End Try


lblTIFFOcrCount.Text = tiffOcrCount.ToString()


End If

End Sub