Merge pull request #1388 from alculquicondor/ocr

Integrating Google Vision's text recognition in Android
2025-02-24 01:38:18 +00:00 · 2018-03-23 09:26:56 -03:00 · 2018-03-23 09:26:56 -03:00 · ebe27e0b5b
commit ebe27e0b5b
parent e0cb6285e6 48611212f5
9 changed files with 331 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # React Native Camera [![Backers on Open Collective](https://opencollective.com/react-native-camera/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/react-native-camera/sponsors/badge.svg)](#sponsors) [![npm version](https://badge.fury.io/js/react-native-camera.svg)](http://badge.fury.io/js/react-native-camera) [![npm downloads](https://img.shields.io/npm/dm/react-native-camera.svg)](https://www.npmjs.com/package/react-native-camera)

-The comprehensive camera module for React Native. Including photographs, videos, face detection and barcode scanning!
+The comprehensive camera module for React Native. Including photographs, videos, face detection, barcode scanning and text recognition (Android only)!

 `import { RNCamera, FaceDetector } from 'react-native-camera';`

@ -87,7 +87,7 @@ pod 'react-native-camera', path: '../node_modules/react-native-camera'
 4. In XCode, in the project navigator, select your project. Add `libRNCamera.a` to your project's `Build Phases` ➜ `Link Binary With Libraries`
 5. Click `RNCamera.xcodeproj` in the project navigator and go the `Build Settings` tab. Make sure 'All' is toggled on (instead of 'Basic'). In the `Search Paths` section, look for `Header Search Paths` and make sure it contains both `$(SRCROOT)/../../react-native/React` and `$(SRCROOT)/../../../React` - mark both as `recursive`.

-### Face Detection Steps
+### Face Detection or Text Recognition Steps

 Face Detection is optional on iOS. If you want it, you are going to need to install Google Mobile Vision frameworks in your project, as mentioned in the next section.

--- a/android/src/main/java/org/reactnative/camera/CameraViewManager.java
+++ b/android/src/main/java/org/reactnative/camera/CameraViewManager.java
@ -19,7 +19,8 @@ public class CameraViewManager extends ViewGroupManager<RNCameraView> {
    EVENT_ON_MOUNT_ERROR("onMountError"),
    EVENT_ON_BAR_CODE_READ("onBarCodeRead"),
    EVENT_ON_FACES_DETECTED("onFacesDetected"),
-    EVENT_ON_FACE_DETECTION_ERROR("onFaceDetectionError");
+    EVENT_ON_FACE_DETECTION_ERROR("onFaceDetectionError"),
+    EVENT_ON_TEXT_RECOGNIZED("onTextRecognized");

    private final String mName;

@ -138,4 +139,9 @@ public class CameraViewManager extends ViewGroupManager<RNCameraView> {
  public void setFaceDetectionClassifications(RNCameraView view, int classifications) {
    view.setFaceDetectionClassifications(classifications);
  }
+
+  @ReactProp(name = "textRecognizerEnabled")
+  public void setTextRecognizing(RNCameraView view, boolean textRecognizerEnabled) {
+    view.setShouldRecognizeText(textRecognizerEnabled);
+  }
 }
--- a/android/src/main/java/org/reactnative/camera/RNCameraView.java
+++ b/android/src/main/java/org/reactnative/camera/RNCameraView.java
@ -17,6 +17,9 @@ import com.facebook.react.bridge.WritableMap;
 import com.facebook.react.uimanager.ThemedReactContext;
 import com.google.android.cameraview.CameraView;
 import com.google.android.gms.vision.face.Face;
+import com.google.android.gms.vision.text.Text;
+import com.google.android.gms.vision.text.TextBlock;
+import com.google.android.gms.vision.text.TextRecognizer;
 import com.google.zxing.BarcodeFormat;
 import com.google.zxing.DecodeHintType;
 import com.google.zxing.MultiFormatReader;
@ -27,6 +30,8 @@ import org.reactnative.camera.tasks.BarCodeScannerAsyncTaskDelegate;
 import org.reactnative.camera.tasks.FaceDetectorAsyncTask;
 import org.reactnative.camera.tasks.FaceDetectorAsyncTaskDelegate;
 import org.reactnative.camera.tasks.ResolveTakenPictureAsyncTask;
+import org.reactnative.camera.tasks.TextRecognizerAsyncTask;
+import org.reactnative.camera.tasks.TextRecognizerAsyncTaskDelegate;
 import org.reactnative.camera.utils.ImageDimensions;
 import org.reactnative.camera.utils.RNFileUtils;
 import org.reactnative.facedetector.RNFaceDetector;
@ -41,7 +46,8 @@ import java.util.Queue;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;

-public class RNCameraView extends CameraView implements LifecycleEventListener, BarCodeScannerAsyncTaskDelegate, FaceDetectorAsyncTaskDelegate {
+public class RNCameraView extends CameraView implements LifecycleEventListener, BarCodeScannerAsyncTaskDelegate, FaceDetectorAsyncTaskDelegate,
+    TextRecognizerAsyncTaskDelegate {
  private ThemedReactContext mThemedReactContext;
  private Queue<Promise> mPictureTakenPromises = new ConcurrentLinkedQueue<>();
  private Map<Promise, ReadableMap> mPictureTakenOptions = new ConcurrentHashMap<>();
@ -55,12 +61,15 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,
  // Concurrency lock for scanners to avoid flooding the runtime
  public volatile boolean barCodeScannerTaskLock = false;
  public volatile boolean faceDetectorTaskLock = false;
+  public volatile boolean textRecognizerTaskLock = false;

  // Scanning-related properties
  private final MultiFormatReader mMultiFormatReader = new MultiFormatReader();
  private final RNFaceDetector mFaceDetector;
+  private final TextRecognizer mTextRecognizer;
  private boolean mShouldDetectFaces = false;
  private boolean mShouldScanBarCodes = false;
+  private boolean mShouldRecognizeText = false;
  private int mFaceDetectorMode = RNFaceDetector.FAST_MODE;
  private int mFaceDetectionLandmarks = RNFaceDetector.NO_LANDMARKS;
  private int mFaceDetectionClassifications = RNFaceDetector.NO_CLASSIFICATIONS;
@ -71,6 +80,7 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,
    mThemedReactContext = themedReactContext;
    mFaceDetector = new RNFaceDetector(themedReactContext);
    setupFaceDetector();
+    mTextRecognizer = new TextRecognizer.Builder(themedReactContext).build();
    themedReactContext.addLifecycleEventListener(this);

    addCallback(new Callback() {
@ -121,6 +131,12 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,
          FaceDetectorAsyncTaskDelegate delegate = (FaceDetectorAsyncTaskDelegate) cameraView;
          new FaceDetectorAsyncTask(delegate, mFaceDetector, data, width, height, correctRotation).execute();
        }
+
+        if (mShouldRecognizeText && !textRecognizerTaskLock && cameraView instanceof TextRecognizerAsyncTaskDelegate) {
+          textRecognizerTaskLock = true;
+          TextRecognizerAsyncTaskDelegate delegate = (TextRecognizerAsyncTaskDelegate) cameraView;
+          new TextRecognizerAsyncTask(delegate, mTextRecognizer, data, width, height, correctRotation).execute();
+        }
      }
    });
  }
@ -145,7 +161,7 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,
  @Override
  public void onViewAdded(View child) {
    if (this.getView() == child || this.getView() == null) return;
-    // remove and readd view to make sure it is in the back.
+    // remove and read view to make sure it is in the back.
    // @TODO figure out why there was a z order issue in the first place and fix accordingly.
    this.removeView(this.getView());
    this.addView(this.getView(), 0);
@ -210,7 +226,7 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,

  public void setShouldScanBarCodes(boolean shouldScanBarCodes) {
    this.mShouldScanBarCodes = shouldScanBarCodes;
-    setScanning(mShouldDetectFaces || mShouldScanBarCodes);
+    setScanning(mShouldDetectFaces || mShouldScanBarCodes || mShouldRecognizeText);
  }

  public void onBarCodeRead(Result barCode) {
@ -260,7 +276,7 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,

  public void setShouldDetectFaces(boolean shouldDetectFaces) {
    this.mShouldDetectFaces = shouldDetectFaces;
-    setScanning(mShouldDetectFaces || mShouldScanBarCodes);
+    setScanning(mShouldDetectFaces || mShouldScanBarCodes || mShouldRecognizeText);
  }

  public void onFacesDetected(SparseArray<Face> facesReported, int sourceWidth, int sourceHeight, int sourceRotation) {
@ -287,6 +303,28 @@ public class RNCameraView extends CameraView implements LifecycleEventListener,
    faceDetectorTaskLock = false;
  }

+  public void setShouldRecognizeText(boolean shouldRecognizeText) {
+    this.mShouldRecognizeText = shouldRecognizeText;
+    setScanning(mShouldDetectFaces || mShouldScanBarCodes || mShouldRecognizeText);
+  }
+
+  @Override
+  public void onTextRecognized(SparseArray<TextBlock> textBlocks, int sourceWidth, int sourceHeight, int sourceRotation) {
+    if (!mShouldRecognizeText) {
+      return;
+    }
+
+    SparseArray<TextBlock> textBlocksDetected = textBlocks == null ? new SparseArray<TextBlock>() : textBlocks;
+    ImageDimensions dimensions = new ImageDimensions(sourceWidth, sourceHeight, sourceRotation, getFacing());
+
+    RNCameraViewHelper.emitTextRecognizedEvent(this, textBlocksDetected, dimensions);
+  }
+
+  @Override
+  public void onTextRecognizerTaskCompleted() {
+    textRecognizerTaskLock = false;
+  }
+
  @Override
  public void onHostResume() {
    if (hasCameraPermissions()) {
--- a/android/src/main/java/org/reactnative/camera/RNCameraViewHelper.java
+++ b/android/src/main/java/org/reactnative/camera/RNCameraViewHelper.java
@ -16,6 +16,7 @@ import com.facebook.react.bridge.WritableMap;
 import com.facebook.react.uimanager.UIManagerModule;
 import com.google.android.cameraview.CameraView;
 import com.google.android.gms.vision.face.Face;
+import com.google.android.gms.vision.text.TextBlock;
 import com.google.zxing.Result;

 import org.reactnative.camera.events.BarCodeReadEvent;
@ -23,6 +24,7 @@ import org.reactnative.camera.events.CameraMountErrorEvent;
 import org.reactnative.camera.events.CameraReadyEvent;
 import org.reactnative.camera.events.FaceDetectionErrorEvent;
 import org.reactnative.camera.events.FacesDetectedEvent;
+import org.reactnative.camera.events.TextRecognizedEvent;
 import org.reactnative.camera.utils.ImageDimensions;
 import org.reactnative.facedetector.RNFaceDetector;

@ -217,6 +219,29 @@ public class RNCameraViewHelper {
    reactContext.getNativeModule(UIManagerModule.class).getEventDispatcher().dispatchEvent(event);
  }

+  // Text recognition event
+
+  public static void emitTextRecognizedEvent(
+      ViewGroup view,
+      SparseArray<TextBlock> textBlocks,
+      ImageDimensions dimensions) {
+    float density = view.getResources().getDisplayMetrics().density;
+
+    double scaleX = (double) view.getWidth() / (dimensions.getWidth() * density);
+    double scaleY = (double) view.getHeight() / (dimensions.getHeight() * density);
+
+    TextRecognizedEvent event = TextRecognizedEvent.obtain(
+        view.getId(),
+        textBlocks,
+        dimensions,
+        scaleX,
+        scaleY
+    );
+
+    ReactContext reactContext = (ReactContext) view.getContext();
+    reactContext.getNativeModule(UIManagerModule.class).getEventDispatcher().dispatchEvent(event);
+  }
+
  // Utilities

  public static int getCorrectCameraRotation(int rotation, int facing) {
--- a/android/src/main/java/org/reactnative/camera/events/TextRecognizedEvent.java
+++ b/android/src/main/java/org/reactnative/camera/events/TextRecognizedEvent.java
@ -0,0 +1,157 @@
+package org.reactnative.camera.events;
+
+import android.support.v4.util.Pools;
+import android.util.SparseArray;
+
+import com.facebook.react.bridge.Arguments;
+import com.facebook.react.bridge.ReadableArray;
+import com.facebook.react.bridge.ReadableMap;
+import com.facebook.react.bridge.WritableArray;
+import com.facebook.react.bridge.WritableMap;
+import com.facebook.react.uimanager.events.Event;
+import com.facebook.react.uimanager.events.RCTEventEmitter;
+import com.google.android.cameraview.CameraView;
+import com.google.android.gms.vision.text.Line;
+import com.google.android.gms.vision.text.Text;
+import com.google.android.gms.vision.text.TextBlock;
+import org.reactnative.camera.CameraViewManager;
+import org.reactnative.camera.utils.ImageDimensions;
+import org.reactnative.facedetector.FaceDetectorUtils;
+
+
+public class TextRecognizedEvent extends Event<TextRecognizedEvent> {
+
+  private static final Pools.SynchronizedPool<TextRecognizedEvent> EVENTS_POOL =
+      new Pools.SynchronizedPool<>(3);
+
+
+  private double mScaleX;
+  private double mScaleY;
+  private SparseArray<TextBlock> mTextBlocks;
+  private ImageDimensions mImageDimensions;
+
+  private TextRecognizedEvent() {}
+
+  public static TextRecognizedEvent obtain(
+      int viewTag,
+      SparseArray<TextBlock> textBlocks,
+      ImageDimensions dimensions,
+      double scaleX,
+      double scaleY) {
+    TextRecognizedEvent event = EVENTS_POOL.acquire();
+    if (event == null) {
+      event = new TextRecognizedEvent();
+    }
+    event.init(viewTag, textBlocks, dimensions, scaleX, scaleY);
+    return event;
+  }
+
+  private void init(
+      int viewTag,
+      SparseArray<TextBlock> textBlocks,
+      ImageDimensions dimensions,
+      double scaleX,
+      double scaleY) {
+    super.init(viewTag);
+    mTextBlocks = textBlocks;
+    mImageDimensions = dimensions;
+    mScaleX = scaleX;
+    mScaleY = scaleY;
+  }
+
+  @Override
+  public String getEventName() {
+    return CameraViewManager.Events.EVENT_ON_TEXT_RECOGNIZED.toString();
+  }
+
+  @Override
+  public void dispatch(RCTEventEmitter rctEventEmitter) {
+    rctEventEmitter.receiveEvent(getViewTag(), getEventName(), serializeEventData());
+  }
+
+  private WritableMap serializeEventData() {
+    WritableArray textBlocksList = Arguments.createArray();
+    for (int i = 0; i < mTextBlocks.size(); ++i) {
+      TextBlock textBlock = mTextBlocks.valueAt(i);
+      WritableMap serializedTextBlock = serializeText(textBlock);
+      if (mImageDimensions.getFacing() == CameraView.FACING_FRONT) {
+        serializedTextBlock = rotateTextX(serializedTextBlock);
+      }
+      textBlocksList.pushMap(serializedTextBlock);
+    }
+
+    WritableMap event = Arguments.createMap();
+    event.putString("type", "textBlock");
+    event.putArray("textBlocks", textBlocksList);
+    event.putInt("target", getViewTag());
+    return event;
+  }
+
+  private WritableMap serializeText(Text text) {
+    WritableMap encodedText = Arguments.createMap();
+
+    WritableArray components = Arguments.createArray();
+    for (Text component : text.getComponents()) {
+      components.pushMap(serializeText(component));
+    }
+    encodedText.putArray("components", components);
+
+    encodedText.putString("value", text.getValue());
+
+    WritableMap origin = Arguments.createMap();
+    origin.putDouble("x", text.getBoundingBox().left * this.mScaleX);
+    origin.putDouble("y", text.getBoundingBox().top * this.mScaleY);
+
+    WritableMap size = Arguments.createMap();
+    size.putDouble("width", text.getBoundingBox().width() * this.mScaleX);
+    size.putDouble("height", text.getBoundingBox().width() * this.mScaleY);
+
+    WritableMap bounds = Arguments.createMap();
+    bounds.putMap("origin", origin);
+    bounds.putMap("size", size);
+
+    encodedText.putMap("bounds", bounds);
+
+    String type_;
+    if (text instanceof TextBlock) {
+      type_ = "block";
+    } else if (text instanceof Line) {
+      type_ = "line";
+    } else /*if (text instanceof Element)*/ {
+      type_ = "element";
+    }
+    encodedText.putString("type", type_);
+
+    return encodedText;
+  }
+
+  private WritableMap rotateTextX(WritableMap text) {
+    ReadableMap faceBounds = text.getMap("bounds");
+
+    ReadableMap oldOrigin = faceBounds.getMap("origin");
+    WritableMap mirroredOrigin = FaceDetectorUtils.positionMirroredHorizontally(
+        oldOrigin, mImageDimensions.getWidth(), mScaleX);
+
+    double translateX = -faceBounds.getMap("size").getDouble("width");
+    WritableMap translatedMirroredOrigin = FaceDetectorUtils.positionTranslatedHorizontally(mirroredOrigin, translateX);
+
+    WritableMap newBounds = Arguments.createMap();
+    newBounds.merge(faceBounds);
+    newBounds.putMap("origin", translatedMirroredOrigin);
+
+    text.putMap("bounds", newBounds);
+
+    ReadableArray oldComponents = text.getArray("components");
+    WritableArray newComponents = Arguments.createArray();
+    for (int i = 0; i < oldComponents.size(); ++i) {
+      WritableMap component = Arguments.createMap();
+      component.merge(oldComponents.getMap(i));
+      rotateTextX(component);
+      newComponents.pushMap(component);
+    }
+    text.putArray("components", newComponents);
+
+    return text;
+  }
+
+}
--- a/android/src/main/java/org/reactnative/camera/tasks/TextRecognizerAsyncTask.java
+++ b/android/src/main/java/org/reactnative/camera/tasks/TextRecognizerAsyncTask.java
@ -0,0 +1,55 @@
+package org.reactnative.camera.tasks;
+
+import android.util.SparseArray;
+
+import com.google.android.gms.vision.text.TextBlock;
+import com.google.android.gms.vision.text.TextRecognizer;
+import org.reactnative.facedetector.RNFrame;
+import org.reactnative.facedetector.RNFrameFactory;
+
+
+public class TextRecognizerAsyncTask extends android.os.AsyncTask<Void, Void, SparseArray<TextBlock>> {
+
+  private TextRecognizerAsyncTaskDelegate mDelegate;
+  private TextRecognizer mTextRecognizer;
+  private byte[] mImageData;
+  private int mWidth;
+  private int mHeight;
+  private int mRotation;
+
+  public TextRecognizerAsyncTask(
+      TextRecognizerAsyncTaskDelegate delegate,
+      TextRecognizer textRecognizer,
+      byte[] imageData,
+      int width,
+      int height,
+      int rotation
+  ) {
+    mDelegate = delegate;
+    mTextRecognizer = textRecognizer;
+    mImageData = imageData;
+    mWidth = width;
+    mHeight = height;
+    mRotation = rotation;
+  }
+
+  @Override
+  protected SparseArray<TextBlock> doInBackground(Void... ignored) {
+    if (isCancelled() || mDelegate == null || mTextRecognizer == null || !mTextRecognizer.isOperational()) {
+      return null;
+    }
+
+    RNFrame frame = RNFrameFactory.buildFrame(mImageData, mWidth, mHeight, mRotation);
+    return mTextRecognizer.detect(frame.getFrame());
+  }
+
+  @Override
+  protected void onPostExecute(SparseArray<TextBlock> textBlocks) {
+    super.onPostExecute(textBlocks);
+
+    if (textBlocks != null) {
+      mDelegate.onTextRecognized(textBlocks, mWidth, mHeight, mRotation);
+    }
+    mDelegate.onTextRecognizerTaskCompleted();
+  }
+}
--- a/android/src/main/java/org/reactnative/camera/tasks/TextRecognizerAsyncTaskDelegate.java
+++ b/android/src/main/java/org/reactnative/camera/tasks/TextRecognizerAsyncTaskDelegate.java
@ -0,0 +1,10 @@
+package org.reactnative.camera.tasks;
+
+import android.util.SparseArray;
+
+import com.google.android.gms.vision.text.TextBlock;
+
+public interface TextRecognizerAsyncTaskDelegate {
+  void onTextRecognized(SparseArray<TextBlock> textBlocks, int sourceWidth, int sourceHeight, int sourceRotation);
+  void onTextRecognizerTaskCompleted();
+}
--- a/docs/RNCamera.md
+++ b/docs/RNCamera.md
@ -233,6 +233,14 @@ Values: `RNCamera.Constants.FaceDetection.Classifications.all` or `RNCamera.Cons

 Classification is determining whether a certain facial characteristic is present. For example, a face can be classified with regards to whether its eyes are open or closed. Another example is whether the face is smiling or not.

+### Text Recognition Related props
+
+Only available in Android. RNCamera uses the Google Mobile Vision frameworks for Text Recognition, you can read more info about it [here](https://developers.google.com/vision/android/text-overview).
+
+#### `onTextRecognized`
+
+Method to be called when text is detected. Receives a Text Recognized Event object. The interesting value of this object is the `textBlocks` value, which is an array with objects of the [TextBlock](https://developers.google.com/android/reference/com/google/android/gms/vision/text/TextBlock) properties.
+
 #### `takePictureAsync([options]): Promise`

 Takes a picture, saves in your app's cache directory and returns a promise.
--- a/src/RNCamera.js
+++ b/src/RNCamera.js
@ -31,6 +31,22 @@ type TrackedFaceFeature = FaceFeature & {
  faceID?: number,
 };

+type TrackedTextFeature = {
+  type: string,
+  bounds: {
+    size: {
+      width: number,
+      height: number,
+    },
+    origin: {
+      x: number,
+      y: number,
+    },
+  },
+  value: string,
+  components: Array<TrackedTextFeature>,
+};
+
 type RecordingOptions = {
  maxDuration?: number,
  maxFileSize?: number,
@ -58,6 +74,7 @@ type PropsType = ViewPropTypes & {
  autoFocus?: string | boolean | number,
  faceDetectionClassifications?: number,
  onFacesDetected?: ({ faces: Array<TrackedFaceFeature> }) => void,
+  onTextRecognized?: ({ textBlocks: Array<TrackedTextFeature> }) => void,
  captureAudio?: boolean,
  useCamera2Api?: boolean,
 };
@ -122,6 +139,7 @@ export default class Camera extends React.Component<PropsType> {
    onCameraReady: PropTypes.func,
    onBarCodeRead: PropTypes.func,
    onFacesDetected: PropTypes.func,
+    onTextRecognized: PropTypes.func,
    faceDetectionMode: PropTypes.number,
    faceDetectionLandmarks: PropTypes.number,
    faceDetectionClassifications: PropTypes.number,
@ -295,6 +313,7 @@ export default class Camera extends React.Component<PropsType> {
          onCameraReady={this._onCameraReady}
          onBarCodeRead={this._onObjectDetected(this.props.onBarCodeRead)}
          onFacesDetected={this._onObjectDetected(this.props.onFacesDetected)}
+          onTextRecognized={this._onObjectDetected(this.props.onTextRecognized)}
        />
      );
    } else if (!this.state.isAuthorizationChecked) {
@ -315,8 +334,13 @@ export default class Camera extends React.Component<PropsType> {
      newProps.faceDetectorEnabled = true;
    }

+    if (props.onTextRecognized) {
+      newProps.textRecognizerEnabled = true;
+    }
+
    if (Platform.OS === 'ios') {
      delete newProps.ratio;
+      delete newProps.textRecognizerEnabled;
    }

    return newProps;
@ -340,6 +364,7 @@ const RNCamera = requireNativeComponent('RNCamera', Camera, {
    accessibilityLiveRegion: true,
    barCodeScannerEnabled: true,
    faceDetectorEnabled: true,
+    textRecognizerEnabled: true,
    importantForAccessibility: true,
    onBarCodeRead: true,
    onCameraReady: true,