To count all of the words that appear in an image for indexing purposes, you need to get each OcrWord object to retrieve their Text properties. This can be accomplished using this code:
C#
Dictionary Words;
private void GatherWords(Atalasoft.Ocr.OcrPage ocrPage)
{
Words = new Dictionary();
foreach (OcrRegion region in ocrPage.Regions)
{
OcrTextRegion textregion = region as OcrTextRegion;
if (textregion != null)
{
foreach (OcrLine line in textregion.Lines)
{
foreach (OcrWord word in line.Words)
{
if (Words.ContainsKey(word.Text))
{
Words[word.Text]++;
}
else
{
Words.Add(word.Text, 1);
}
}
}
}
}
}
VB.NET
Private Words As Dictionary(Of String, Integer)
Private Sub GatherWords(ByVal ocrPage As Atalasoft.Ocr.OcrPage)
Words = New Dictionary(Of String, Integer)()
For Each region As OcrRegion In ocrPage.Regions
Dim textregion As OcrTextRegion = TryCast(region, OcrTextRegion)
If textregion IsNot Nothing Then
For Each line As OcrLine In textregion.Lines
For Each word As OcrWord In line.Words
If Words.ContainsKey(word.Text) Then
Words(word.Text) += 1
Else
Words.Add(word.Text, 1)
End If
Next
Next
End If
Next
End Sub
Original Article:
Q10244 - HOWTO: OCR to get the total number of each word in a image