linux/arch/arc/lib/memset-archs.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
   4 */
   5
   6#include <linux/linkage.h>
   7#include <asm/cache.h>
   8
   9/*
  10 * The memset implementation below is optimized to use prefetchw and prealloc
  11 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
  12 * If you want to implement optimized memset for other possible L1 data cache
  13 * line lengths (32B and 128B) you should rewrite code carefully checking
  14 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
  15 * don't belongs to memset area.
  16 */
  17
  18#if L1_CACHE_SHIFT == 6
  19
  20.macro PREALLOC_INSTR   reg, off
  21        prealloc        [\reg, \off]
  22.endm
  23
  24.macro PREFETCHW_INSTR  reg, off
  25        prefetchw       [\reg, \off]
  26.endm
  27
  28#else
  29
  30.macro PREALLOC_INSTR   reg, off
  31.endm
  32
  33.macro PREFETCHW_INSTR  reg, off
  34.endm
  35
  36#endif
  37
  38ENTRY_CFI(memset)
  39        PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
  40        mov.f   0, r2
  41;;; if size is zero
  42        jz.d    [blink]
  43        mov     r3, r0          ; don't clobber ret val
  44
  45;;; if length < 8
  46        brls.d.nt       r2, 8, .Lsmallchunk
  47        mov.f   lp_count,r2
  48
  49        and.f   r4, r0, 0x03
  50        rsub    lp_count, r4, 4
  51        lpnz    @.Laligndestination
  52        ;; LOOP BEGIN
  53        stb.ab  r1, [r3,1]
  54        sub     r2, r2, 1
  55.Laligndestination:
  56
  57;;; Destination is aligned
  58        and     r1, r1, 0xFF
  59        asl     r4, r1, 8
  60        or      r4, r4, r1
  61        asl     r5, r4, 16
  62        or      r5, r5, r4
  63        mov     r4, r5
  64
  65        sub3    lp_count, r2, 8
  66        cmp     r2, 64
  67        bmsk.hi r2, r2, 5
  68        mov.ls  lp_count, 0
  69        add3.hi r2, r2, 8
  70
  71;;; Convert len to Dwords, unfold x8
  72        lsr.f   lp_count, lp_count, 6
  73
  74        lpnz    @.Lset64bytes
  75        ;; LOOP START
  76        PREALLOC_INSTR  r3, 64  ; alloc next line w/o fetching
  77
  78#ifdef CONFIG_ARC_HAS_LL64
  79        std.ab  r4, [r3, 8]
  80        std.ab  r4, [r3, 8]
  81        std.ab  r4, [r3, 8]
  82        std.ab  r4, [r3, 8]
  83        std.ab  r4, [r3, 8]
  84        std.ab  r4, [r3, 8]
  85        std.ab  r4, [r3, 8]
  86        std.ab  r4, [r3, 8]
  87#else
  88        st.ab   r4, [r3, 4]
  89        st.ab   r4, [r3, 4]
  90        st.ab   r4, [r3, 4]
  91        st.ab   r4, [r3, 4]
  92        st.ab   r4, [r3, 4]
  93        st.ab   r4, [r3, 4]
  94        st.ab   r4, [r3, 4]
  95        st.ab   r4, [r3, 4]
  96        st.ab   r4, [r3, 4]
  97        st.ab   r4, [r3, 4]
  98        st.ab   r4, [r3, 4]
  99        st.ab   r4, [r3, 4]
 100        st.ab   r4, [r3, 4]
 101        st.ab   r4, [r3, 4]
 102        st.ab   r4, [r3, 4]
 103        st.ab   r4, [r3, 4]
 104#endif
 105.Lset64bytes:
 106
 107        lsr.f   lp_count, r2, 5 ;Last remaining  max 124 bytes
 108        lpnz    .Lset32bytes
 109        ;; LOOP START
 110#ifdef CONFIG_ARC_HAS_LL64
 111        std.ab  r4, [r3, 8]
 112        std.ab  r4, [r3, 8]
 113        std.ab  r4, [r3, 8]
 114        std.ab  r4, [r3, 8]
 115#else
 116        st.ab   r4, [r3, 4]
 117        st.ab   r4, [r3, 4]
 118        st.ab   r4, [r3, 4]
 119        st.ab   r4, [r3, 4]
 120        st.ab   r4, [r3, 4]
 121        st.ab   r4, [r3, 4]
 122        st.ab   r4, [r3, 4]
 123        st.ab   r4, [r3, 4]
 124#endif
 125.Lset32bytes:
 126
 127        and.f   lp_count, r2, 0x1F ;Last remaining 31 bytes
 128.Lsmallchunk:
 129        lpnz    .Lcopy3bytes
 130        ;; LOOP START
 131        stb.ab  r1, [r3, 1]
 132.Lcopy3bytes:
 133
 134        j       [blink]
 135
 136END_CFI(memset)
 137
 138ENTRY_CFI(memzero)
 139    ; adjust bzero args to memset args
 140    mov r2, r1
 141    b.d  memset    ;tail call so need to tinker with blink
 142    mov r1, 0
 143END_CFI(memzero)
 144